• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
3
4; RUN: llc -march=amdgcn -mcpu=fiji -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s
5; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s
6
7; --------------------------------------------------------------------------------
8; fadd tests
9; --------------------------------------------------------------------------------
10
11; GCN-LABEL: {{^}}v_fneg_add_f32:
12; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
13; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
14
15; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
16; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
17
18; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
19; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
20define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
21  %tid = call i32 @llvm.amdgcn.workitem.id.x()
22  %tid.ext = sext i32 %tid to i64
23  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
24  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
25  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
26  %a = load volatile float, float addrspace(1)* %a.gep
27  %b = load volatile float, float addrspace(1)* %b.gep
28  %add = fadd float %a, %b
29  %fneg = fneg float %add
30  store float %fneg, float addrspace(1)* %out.gep
31  ret void
32}
33
34; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32:
35; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
36; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
37; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
38; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
39; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
40; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
41define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
42  %tid = call i32 @llvm.amdgcn.workitem.id.x()
43  %tid.ext = sext i32 %tid to i64
44  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
45  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
46  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
47  %a = load volatile float, float addrspace(1)* %a.gep
48  %b = load volatile float, float addrspace(1)* %b.gep
49  %add = fadd float %a, %b
50  %fneg = fneg float %add
51  store volatile float %fneg, float addrspace(1)* %out
52  store volatile float %add, float addrspace(1)* %out
53  ret void
54}
55
56; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32:
57; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
58; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
59
60; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
61; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]]
62; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]]
63
64; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]]
65; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]]
66
67; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
68; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
69define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
70  %tid = call i32 @llvm.amdgcn.workitem.id.x()
71  %tid.ext = sext i32 %tid to i64
72  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
73  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
74  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
75  %a = load volatile float, float addrspace(1)* %a.gep
76  %b = load volatile float, float addrspace(1)* %b.gep
77  %add = fadd float %a, %b
78  %fneg = fneg float %add
79  %use1 = fmul float %add, 4.0
80  store volatile float %fneg, float addrspace(1)* %out
81  store volatile float %use1, float addrspace(1)* %out
82  ret void
83}
84
85; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32:
86; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
87; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
88
89; GCN-SAFE: v_sub_f32_e32
90; GCN-SAFE: v_xor_b32_e32 [[ADD:v[0-9]+]], 0x80000000,
91
92; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
93
94; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
95define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
96  %tid = call i32 @llvm.amdgcn.workitem.id.x()
97  %tid.ext = sext i32 %tid to i64
98  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
99  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
100  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
101  %a = load volatile float, float addrspace(1)* %a.gep
102  %b = load volatile float, float addrspace(1)* %b.gep
103  %fneg.a = fneg float %a
104  %add = fadd float %fneg.a, %b
105  %fneg = fneg float %add
106  store volatile float %fneg, float addrspace(1)* %out
107  ret void
108}
109
110; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32:
111; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
112; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
113
114; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
115; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
116
117; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
118; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
119define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
120  %tid = call i32 @llvm.amdgcn.workitem.id.x()
121  %tid.ext = sext i32 %tid to i64
122  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
123  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
124  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
125  %a = load volatile float, float addrspace(1)* %a.gep
126  %b = load volatile float, float addrspace(1)* %b.gep
127  %fneg.b = fneg float %b
128  %add = fadd float %a, %fneg.b
129  %fneg = fneg float %add
130  store volatile float %fneg, float addrspace(1)* %out
131  ret void
132}
133
134; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32:
135; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
136; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
137
138; GCN-SAFE: v_sub_f32_e64 [[ADD:v[0-9]+]], -[[A]], [[B]]
139; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
140
141; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
142; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
143define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
144  %tid = call i32 @llvm.amdgcn.workitem.id.x()
145  %tid.ext = sext i32 %tid to i64
146  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
147  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
148  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
149  %a = load volatile float, float addrspace(1)* %a.gep
150  %b = load volatile float, float addrspace(1)* %b.gep
151  %fneg.a = fneg float %a
152  %fneg.b = fneg float %b
153  %add = fadd float %fneg.a, %fneg.b
154  %fneg = fneg float %add
155  store volatile float %fneg, float addrspace(1)* %out
156  ret void
157}
158
159; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32:
160; GCN-SAFE-DAG: s_brev_b32 [[SIGNBIT:s[0-9]+]], 1{{$}}
161; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
162; GCN-DAG: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
163
164; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[SIGNBIT]], [[A]]
165; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
166; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[SIGNBIT]], [[ADD]]
167
168; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
169; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
170; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
171; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
172define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
173  %tid = call i32 @llvm.amdgcn.workitem.id.x()
174  %tid.ext = sext i32 %tid to i64
175  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
176  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
177  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
178  %a = load volatile float, float addrspace(1)* %a.gep
179  %b = load volatile float, float addrspace(1)* %b.gep
180  %fneg.a = fneg float %a
181  %add = fadd float %fneg.a, %b
182  %fneg = fneg float %add
183  store volatile float %fneg, float addrspace(1)* %out
184  store volatile float %fneg.a, float addrspace(1)* %out
185  ret void
186}
187
188; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32:
189; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
190; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
191
192; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
193; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
194; GCN-SAFE-DAG: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
195
196; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
197; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
198; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_ADD]]
199; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
200define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
201  %tid = call i32 @llvm.amdgcn.workitem.id.x()
202  %tid.ext = sext i32 %tid to i64
203  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
204  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
205  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
206  %a = load volatile float, float addrspace(1)* %a.gep
207  %b = load volatile float, float addrspace(1)* %b.gep
208  %fneg.a = fneg float %a
209  %add = fadd float %fneg.a, %b
210  %fneg = fneg float %add
211  %use1 = fmul float %fneg.a, %c
212  store volatile float %fneg, float addrspace(1)* %out
213  store volatile float %use1, float addrspace(1)* %out
214  ret void
215}
216
217; This one asserted with -enable-no-signed-zeros-fp-math
218; GCN-LABEL: {{^}}fneg_fadd_0:
219; GCN-SAFE-DAG: v_mad_f32 [[A:v[0-9]+]],
220; GCN-SAFE-DAG: v_cmp_ngt_f32_e32 {{.*}}, [[A]]
221; GCN-SAFE-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, -[[A]]
222define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 {
223.entry:
224  %tmp7 = fdiv float 1.000000e+00, %tmp6
225  %tmp8 = fmul float 0.000000e+00, %tmp7
226  %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
227  %.i188 = fadd float %tmp9, 0.000000e+00
228  %tmp10 = fcmp uge float %.i188, %tmp2
229  %tmp11 = fneg float %.i188
230  %.i092 = select i1 %tmp10, float %tmp2, float %tmp11
231  %tmp12 = fcmp ule float %.i092, 0.000000e+00
232  %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
233  ret float %.i198
234}
235
236; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
237; function attribute unsafe-fp-math automatically. Combine with the previous test
238; when that is done.
239; GCN-LABEL: {{^}}fneg_fadd_0_nsz:
240; GCN-NSZ-DAG: v_rcp_f32_e32 [[A:v[0-9]+]],
241; GCN-NSZ-DAG: v_mov_b32_e32 [[B:v[0-9]+]],
242; GCN-NSZ-DAG: v_mov_b32_e32 [[C:v[0-9]+]],
243; GCN-NSZ-DAG: v_mul_f32_e32 [[D:v[0-9]+]],
244; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[D]]
245define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #2 {
246.entry:
247  %tmp7 = fdiv float 1.000000e+00, %tmp6
248  %tmp8 = fmul float 0.000000e+00, %tmp7
249  %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
250  %.i188 = fadd float %tmp9, 0.000000e+00
251  %tmp10 = fcmp uge float %.i188, %tmp2
252  %tmp11 = fneg float %.i188
253  %.i092 = select i1 %tmp10, float %tmp2, float %tmp11
254  %tmp12 = fcmp ule float %.i092, 0.000000e+00
255  %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
256  ret float %.i198
257}
258
259; --------------------------------------------------------------------------------
260; fmul tests
261; --------------------------------------------------------------------------------
262
263; GCN-LABEL: {{^}}v_fneg_mul_f32:
264; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
265; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
266; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
267; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
268define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
269  %tid = call i32 @llvm.amdgcn.workitem.id.x()
270  %tid.ext = sext i32 %tid to i64
271  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
272  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
273  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
274  %a = load volatile float, float addrspace(1)* %a.gep
275  %b = load volatile float, float addrspace(1)* %b.gep
276  %mul = fmul float %a, %b
277  %fneg = fneg float %mul
278  store float %fneg, float addrspace(1)* %out.gep
279  ret void
280}
281
282; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32:
283; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
284; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
285; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
286; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
287; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
288; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
289define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
290  %tid = call i32 @llvm.amdgcn.workitem.id.x()
291  %tid.ext = sext i32 %tid to i64
292  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
293  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
294  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
295  %a = load volatile float, float addrspace(1)* %a.gep
296  %b = load volatile float, float addrspace(1)* %b.gep
297  %mul = fmul float %a, %b
298  %fneg = fneg float %mul
299  store volatile float %fneg, float addrspace(1)* %out
300  store volatile float %mul, float addrspace(1)* %out
301  ret void
302}
303
304; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32:
305; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
306; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
307; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], [[A]], -[[B]]
308; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]]
309
310; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
311; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
312define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
313  %tid = call i32 @llvm.amdgcn.workitem.id.x()
314  %tid.ext = sext i32 %tid to i64
315  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
316  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
317  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
318  %a = load volatile float, float addrspace(1)* %a.gep
319  %b = load volatile float, float addrspace(1)* %b.gep
320  %mul = fmul float %a, %b
321  %fneg = fneg float %mul
322  %use1 = fmul float %mul, 4.0
323  store volatile float %fneg, float addrspace(1)* %out
324  store volatile float %use1, float addrspace(1)* %out
325  ret void
326}
327
328; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32:
329; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
330; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
331; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
332; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
333define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
334  %tid = call i32 @llvm.amdgcn.workitem.id.x()
335  %tid.ext = sext i32 %tid to i64
336  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
337  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
338  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
339  %a = load volatile float, float addrspace(1)* %a.gep
340  %b = load volatile float, float addrspace(1)* %b.gep
341  %fneg.a = fneg float %a
342  %mul = fmul float %fneg.a, %b
343  %fneg = fneg float %mul
344  store volatile float %fneg, float addrspace(1)* %out
345  ret void
346}
347
348; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32:
349; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
350; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
351; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
352; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
353define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
354  %tid = call i32 @llvm.amdgcn.workitem.id.x()
355  %tid.ext = sext i32 %tid to i64
356  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
357  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
358  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
359  %a = load volatile float, float addrspace(1)* %a.gep
360  %b = load volatile float, float addrspace(1)* %b.gep
361  %fneg.b = fneg float %b
362  %mul = fmul float %a, %fneg.b
363  %fneg = fneg float %mul
364  store volatile float %fneg, float addrspace(1)* %out
365  ret void
366}
367
368; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32:
369; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
370; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
371; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
372; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
373define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
374  %tid = call i32 @llvm.amdgcn.workitem.id.x()
375  %tid.ext = sext i32 %tid to i64
376  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
377  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
378  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
379  %a = load volatile float, float addrspace(1)* %a.gep
380  %b = load volatile float, float addrspace(1)* %b.gep
381  %fneg.a = fneg float %a
382  %fneg.b = fneg float %b
383  %mul = fmul float %fneg.a, %fneg.b
384  %fneg = fneg float %mul
385  store volatile float %fneg, float addrspace(1)* %out
386  ret void
387}
388
389; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32:
390; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
391; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
392; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
393; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
394
395; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
396; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
397define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
398  %tid = call i32 @llvm.amdgcn.workitem.id.x()
399  %tid.ext = sext i32 %tid to i64
400  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
401  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
402  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
403  %a = load volatile float, float addrspace(1)* %a.gep
404  %b = load volatile float, float addrspace(1)* %b.gep
405  %fneg.a = fneg float %a
406  %mul = fmul float %fneg.a, %b
407  %fneg = fneg float %mul
408  store volatile float %fneg, float addrspace(1)* %out
409  store volatile float %fneg.a, float addrspace(1)* %out
410  ret void
411}
412
413; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32:
414; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
415; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
416; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]]
417; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
418; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
419; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
420define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
421  %tid = call i32 @llvm.amdgcn.workitem.id.x()
422  %tid.ext = sext i32 %tid to i64
423  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
424  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
425  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
426  %a = load volatile float, float addrspace(1)* %a.gep
427  %b = load volatile float, float addrspace(1)* %b.gep
428  %fneg.a = fneg float %a
429  %mul = fmul float %fneg.a, %b
430  %fneg = fneg float %mul
431  %use1 = fmul float %fneg.a, %c
432  store volatile float %fneg, float addrspace(1)* %out
433  store volatile float %use1, float addrspace(1)* %out
434  ret void
435}
436
437; --------------------------------------------------------------------------------
438; fminnum tests
439; --------------------------------------------------------------------------------
440
441; GCN-LABEL: {{^}}v_fneg_minnum_f32_ieee:
442; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
443; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
444; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
445; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
446; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
447; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
448define amdgpu_kernel void @v_fneg_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
449  %tid = call i32 @llvm.amdgcn.workitem.id.x()
450  %tid.ext = sext i32 %tid to i64
451  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
452  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
453  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
454  %a = load volatile float, float addrspace(1)* %a.gep
455  %b = load volatile float, float addrspace(1)* %b.gep
456  %min = call float @llvm.minnum.f32(float %a, float %b)
457  %fneg = fneg float %min
458  store float %fneg, float addrspace(1)* %out.gep
459  ret void
460}
461
462; GCN-LABEL: {{^}}v_fneg_minnum_f32_no_ieee:
463; GCN-NOT: v0
464; GCN-NOT: v1
465; GCN: v_max_f32_e64 v0, -v0, -v1
466; GCN-NEXT: ; return
467define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 {
468  %min = call float @llvm.minnum.f32(float %a, float %b)
469  %fneg = fneg float %min
470  ret float %fneg
471}
472
473; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_ieee:
474; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
475; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
476; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
477; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
478define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
479  %tid = call i32 @llvm.amdgcn.workitem.id.x()
480  %tid.ext = sext i32 %tid to i64
481  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
482  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
483  %a = load volatile float, float addrspace(1)* %a.gep
484  %min = call float @llvm.minnum.f32(float %a, float %a)
485  %min.fneg = fsub float -0.0, %min
486  store float %min.fneg, float addrspace(1)* %out.gep
487  ret void
488}
489
490; GCN-LABEL: {{^}}v_fneg_self_minnum_f32_no_ieee:
491; GCN-NOT: v0
492; GCN: v_max_f32_e64 v0, -v0, -v0
493; GCN-NEXT: ; return
494define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 {
495  %min = call float @llvm.minnum.f32(float %a, float %a)
496  %min.fneg = fsub float -0.0, %min
497  ret float %min.fneg
498}
499
500; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_ieee:
501; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
502; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
503; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
504; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
505define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
506  %tid = call i32 @llvm.amdgcn.workitem.id.x()
507  %tid.ext = sext i32 %tid to i64
508  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
509  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
510  %a = load volatile float, float addrspace(1)* %a.gep
511  %min = call float @llvm.minnum.f32(float 4.0, float %a)
512  %fneg = fneg float %min
513  store float %fneg, float addrspace(1)* %out.gep
514  ret void
515}
516
517; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32_no_ieee:
518; GCN-NOT: v0
519; GCN: v_max_f32_e64 v0, -v0, -4.0
520; GCN-NEXT: ; return
521define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 {
522  %min = call float @llvm.minnum.f32(float 4.0, float %a)
523  %fneg = fneg float %min
524  ret float %fneg
525}
526
527; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_ieee:
528; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
529; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
530; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
531; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
532define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
533  %tid = call i32 @llvm.amdgcn.workitem.id.x()
534  %tid.ext = sext i32 %tid to i64
535  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
536  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
537  %a = load volatile float, float addrspace(1)* %a.gep
538  %min = call float @llvm.minnum.f32(float -4.0, float %a)
539  %fneg = fneg float %min
540  store float %fneg, float addrspace(1)* %out.gep
541  ret void
542}
543
544; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32_no_ieee:
545; GCN-NOT: v0
546; GCN: v_max_f32_e64 v0, -v0, 4.0
547; GCN-NEXT: ; return
548define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 {
549  %min = call float @llvm.minnum.f32(float -4.0, float %a)
550  %fneg = fneg float %min
551  ret float %fneg
552}
553
554; GCN-LABEL: {{^}}v_fneg_0_minnum_f32:
555; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
556; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
557; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
558define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
559  %tid = call i32 @llvm.amdgcn.workitem.id.x()
560  %tid.ext = sext i32 %tid to i64
561  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
562  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
563  %a = load volatile float, float addrspace(1)* %a.gep
564  %min = call float @llvm.minnum.f32(float 0.0, float %a)
565  %fneg = fneg float %min
566  store float %fneg, float addrspace(1)* %out.gep
567  ret void
568}
569
570; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_ieee:
571; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
572; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
573; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
574; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
575define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
576  %tid = call i32 @llvm.amdgcn.workitem.id.x()
577  %tid.ext = sext i32 %tid to i64
578  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
579  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
580  %a = load volatile float, float addrspace(1)* %a.gep
581  %min = call float @llvm.minnum.f32(float -0.0, float %a)
582  %fneg = fneg float %min
583  store float %fneg, float addrspace(1)* %out.gep
584  ret void
585}
586
587; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f32:
588; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
589
590; SI-DAG: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
591; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
592
593; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
594; VI: v_min_f32_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
595; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
596
597; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
598define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
599  %tid = call i32 @llvm.amdgcn.workitem.id.x()
600  %tid.ext = sext i32 %tid to i64
601  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
602  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
603  %a = load volatile float, float addrspace(1)* %a.gep
604  %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
605  %fneg = fneg float %min
606  store float %fneg, float addrspace(1)* %out.gep
607  ret void
608}
609
610; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f32:
611; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
612
613; SI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
614; SI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0x3e22f983, [[NEG_QUIET]]
615
616; VI: v_mul_f32_e32 [[NEG_QUIET:v[0-9]+]], -1.0, [[A]]
617; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
618
619; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
620define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
621  %tid = call i32 @llvm.amdgcn.workitem.id.x()
622  %tid.ext = sext i32 %tid to i64
623  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
624  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
625  %a = load volatile float, float addrspace(1)* %a.gep
626  %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
627  %fneg = fneg float %min
628  store float %fneg, float addrspace(1)* %out.gep
629  ret void
630}
631
632; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f16:
633; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
634
635; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
636; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0xbe230000, [[CVT]]
637; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
638
639; VI: v_max_f16_e32 [[QUIET:v[0-9]+]], [[A]], [[A]]
640; VI: v_min_f16_e32 [[MAX:v[0-9]+]], 0.15915494, [[QUIET]]
641; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x8000, [[MAX]]
642
643; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
644define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
645  %tid = call i32 @llvm.amdgcn.workitem.id.x()
646  %tid.ext = sext i32 %tid to i64
647  %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
648  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
649  %a = load volatile half, half addrspace(1)* %a.gep
650  %min = call half @llvm.minnum.f16(half 0xH3118, half %a)
651  %fneg = fsub half -0.000000e+00, %min
652  store half %fneg, half addrspace(1)* %out.gep
653  ret void
654}
655
656; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f16:
657; GCN-DAG: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
658
659; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
660; SI: v_max_f32_e32 [[MAX:v[0-9]+]], 0x3e230000, [[CVT]]
661; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[MAX]]
662
663; VI: v_max_f16_e64 [[NEG_QUIET:v[0-9]+]], -[[A]], -[[A]]
664; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
665
666; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
667define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
668  %tid = call i32 @llvm.amdgcn.workitem.id.x()
669  %tid.ext = sext i32 %tid to i64
670  %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
671  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
672  %a = load volatile half, half addrspace(1)* %a.gep
673  %min = call half @llvm.minnum.f16(half 0xHB118, half %a)
674  %fneg = fsub half -0.000000e+00, %min
675  store half %fneg, half addrspace(1)* %out.gep
676  ret void
677}
678
679; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_f64:
680; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
681
682; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xbfc45f30
683; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
684; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
685; SI: v_max_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
686
687; VI: v_min_f64 v{{\[}}[[RESULT_LO:[0-9]+]]:[[RESULT_HI:[0-9]+]]{{\]}}, [[A]], 0.15915494
688; VI: v_xor_b32_e32 v[[RESULT_HI]], 0x80000000, v[[RESULT_HI]]
689
690; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
691define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
692  %tid = call i32 @llvm.amdgcn.workitem.id.x()
693  %tid.ext = sext i32 %tid to i64
694  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
695  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
696  %a = load volatile double, double addrspace(1)* %a.gep
697  %min = call double @llvm.minnum.f64(double 0x3fc45f306dc9c882, double %a)
698  %fneg = fsub double -0.000000e+00, %min
699  store double %fneg, double addrspace(1)* %out.gep
700  ret void
701}
702
703; GCN-LABEL: {{^}}v_fneg_neg_inv2pi_minnum_f64:
704; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
705
706; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x3fc45f30
707; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0x6dc9c882
708; SI-DAG: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
709; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
710
711; VI: v_max_f64 [[NEG_QUIET:v\[[0-9]+:[0-9]+\]]], -[[A]], -[[A]]
712; VI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], 0.15915494
713
714; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
715define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
716  %tid = call i32 @llvm.amdgcn.workitem.id.x()
717  %tid.ext = sext i32 %tid to i64
718  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
719  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
720  %a = load volatile double, double addrspace(1)* %a.gep
721  %min = call double @llvm.minnum.f64(double 0xbfc45f306dc9c882, double %a)
722  %fneg = fsub double -0.000000e+00, %min
723  store double %fneg, double addrspace(1)* %out.gep
724  ret void
725}
726
727; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32_no_ieee:
728; GCN-NOT: v0
729; GCN: v_max_f32_e64 v0, -v0, 0{{$}}
730; GCN-NEXT: ; return
731define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
732  %min = call float @llvm.minnum.f32(float -0.0, float %a)
733  %fneg = fneg float %min
734  ret float %fneg
735}
736
737; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_ieee:
738; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
739; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
740; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
741; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[QUIET_A]]
742; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
743; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
744define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
745  %tid = call i32 @llvm.amdgcn.workitem.id.x()
746  %tid.ext = sext i32 %tid to i64
747  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
748  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
749  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
750  %a = load volatile float, float addrspace(1)* %a.gep
751  %b = load volatile float, float addrspace(1)* %b.gep
752  %min = call float @llvm.minnum.f32(float 0.0, float %a)
753  %fneg = fneg float %min
754  %mul = fmul float %fneg, %b
755  store float %mul, float addrspace(1)* %out.gep
756  ret void
757}
758
759; GCN-LABEL: {{^}}v_fneg_inv2pi_minnum_foldable_use_f32:
760; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
761; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
762
763; SI: v_mul_f32_e32 [[QUIET_NEG:v[0-9]+]], -1.0, [[A]]
764
765; SI: v_max_f32_e32 [[MIN:v[0-9]+]], 0xbe22f983, [[QUIET_NEG]]
766; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[B]]
767
768; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[A]]
769; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0.15915494, [[QUIET]]
770; VI: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
771
772; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
773define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
774  %tid = call i32 @llvm.amdgcn.workitem.id.x()
775  %tid.ext = sext i32 %tid to i64
776  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
777  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
778  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
779  %a = load volatile float, float addrspace(1)* %a.gep
780  %b = load volatile float, float addrspace(1)* %b.gep
781  %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
782  %fneg = fneg float %min
783  %mul = fmul float %fneg, %b
784  store float %mul, float addrspace(1)* %out.gep
785  ret void
786}
787
788; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32_no_ieee:
789; GCN-NOT: v0
790; GCN-NOT: v1
791; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, v0
792; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], v1
793; GCN-NEXT: ; return
794define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
795  %min = call float @llvm.minnum.f32(float 0.0, float %a)
796  %fneg = fneg float %min
797  %mul = fmul float %fneg, %b
798  ret float %mul
799}
800
801; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_ieee:
802; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
803; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
804; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
805; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
806; GCN: v_max_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
807; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
808; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
809; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
810define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
811  %tid = call i32 @llvm.amdgcn.workitem.id.x()
812  %tid.ext = sext i32 %tid to i64
813  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
814  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
815  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
816  %a = load volatile float, float addrspace(1)* %a.gep
817  %b = load volatile float, float addrspace(1)* %b.gep
818  %min = call float @llvm.minnum.f32(float %a, float %b)
819  %fneg = fneg float %min
820  %use1 = fmul float %min, 4.0
821  store volatile float %fneg, float addrspace(1)* %out
822  store volatile float %use1, float addrspace(1)* %out
823  ret void
824}
825
826; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32_no_ieee:
827; GCN-NOT: v0
828; GCN-NOT: v1
829; GCN: v_max_f32_e64 v0, -v0, -v1
830; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
831; GCN-NEXT: ; return
832define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 {
833  %min = call float @llvm.minnum.f32(float %a, float %b)
834  %fneg = fneg float %min
835  %use1 = fmul float %min, 4.0
836  %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
837  %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
838  ret <2 x float> %ins1
839}
840
841; --------------------------------------------------------------------------------
842; fmaxnum tests
843; --------------------------------------------------------------------------------
844
845
846; GCN-LABEL: {{^}}v_fneg_maxnum_f32_ieee:
847; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
848; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
849; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
850; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
851; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
852; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
853define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
854  %tid = call i32 @llvm.amdgcn.workitem.id.x()
855  %tid.ext = sext i32 %tid to i64
856  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
857  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
858  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
859  %a = load volatile float, float addrspace(1)* %a.gep
860  %b = load volatile float, float addrspace(1)* %b.gep
861  %max = call float @llvm.maxnum.f32(float %a, float %b)
862  %fneg = fneg float %max
863  store float %fneg, float addrspace(1)* %out.gep
864  ret void
865}
866
867; GCN-LABEL: {{^}}v_fneg_maxnum_f32_no_ieee:
868; GCN-NOT: v0
869; GCN-NOT: v1
870; GCN: v_min_f32_e64 v0, -v0, -v1
871; GCN-NEXT: ; return
872define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 {
873  %max = call float @llvm.maxnum.f32(float %a, float %b)
874  %fneg = fneg float %max
875  ret float %fneg
876}
877
878; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_ieee:
879; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
880; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
881; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
882; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
883define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
884  %tid = call i32 @llvm.amdgcn.workitem.id.x()
885  %tid.ext = sext i32 %tid to i64
886  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
887  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
888  %a = load volatile float, float addrspace(1)* %a.gep
889  %max = call float @llvm.maxnum.f32(float %a, float %a)
890  %max.fneg = fsub float -0.0, %max
891  store float %max.fneg, float addrspace(1)* %out.gep
892  ret void
893}
894
895; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32_no_ieee:
896; GCN-NOT: v0
897; GCN: v_min_f32_e64 v0, -v0, -v0
898; GCN-NEXT: ; return
899define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 {
900  %max = call float @llvm.maxnum.f32(float %a, float %a)
901  %max.fneg = fsub float -0.0, %max
902  ret float %max.fneg
903}
904
905; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_ieee:
906; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
907; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
908; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
909; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
910define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
911  %tid = call i32 @llvm.amdgcn.workitem.id.x()
912  %tid.ext = sext i32 %tid to i64
913  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
914  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
915  %a = load volatile float, float addrspace(1)* %a.gep
916  %max = call float @llvm.maxnum.f32(float 4.0, float %a)
917  %fneg = fneg float %max
918  store float %fneg, float addrspace(1)* %out.gep
919  ret void
920}
921
922; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32_no_ieee:
923; GCN-NOT: v0
924; GCN: v_min_f32_e64 v0, -v0, -4.0
925; GCN-NEXT: ; return
926define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 {
927  %max = call float @llvm.maxnum.f32(float 4.0, float %a)
928  %fneg = fneg float %max
929  ret float %fneg
930}
931
932; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_ieee:
933; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
934; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
935; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
936; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
937define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
938  %tid = call i32 @llvm.amdgcn.workitem.id.x()
939  %tid.ext = sext i32 %tid to i64
940  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
941  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
942  %a = load volatile float, float addrspace(1)* %a.gep
943  %max = call float @llvm.maxnum.f32(float -4.0, float %a)
944  %fneg = fneg float %max
945  store float %fneg, float addrspace(1)* %out.gep
946  ret void
947}
948
949; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32_no_ieee:
950; GCN-NOT: v0
951; GCN: v_min_f32_e64 v0, -v0, 4.0
952; GCN-NEXT: ; return
953define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 {
954  %max = call float @llvm.maxnum.f32(float -4.0, float %a)
955  %fneg = fneg float %max
956  ret float %fneg
957}
958
959; GCN-LABEL: {{^}}v_fneg_0_maxnum_f32:
960; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
961; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]]
962; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
963define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
964  %tid = call i32 @llvm.amdgcn.workitem.id.x()
965  %tid.ext = sext i32 %tid to i64
966  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
967  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
968  %a = load volatile float, float addrspace(1)* %a.gep
969  %max = call float @llvm.maxnum.f32(float 0.0, float %a)
970  %fneg = fneg float %max
971  store float %fneg, float addrspace(1)* %out.gep
972  ret void
973}
974
975; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_ieee:
976; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
977; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
978; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
979; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
980define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
981  %tid = call i32 @llvm.amdgcn.workitem.id.x()
982  %tid.ext = sext i32 %tid to i64
983  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
984  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
985  %a = load volatile float, float addrspace(1)* %a.gep
986  %max = call float @llvm.maxnum.f32(float -0.0, float %a)
987  %fneg = fneg float %max
988  store float %fneg, float addrspace(1)* %out.gep
989  ret void
990}
991
992; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32_no_ieee:
993; GCN-NOT: v0
994; GCN: v_min_f32_e64 v0, -v0, 0{{$}}
995; GCN-NEXT: ; return
996define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 {
997  %max = call float @llvm.maxnum.f32(float -0.0, float %a)
998  %fneg = fneg float %max
999  ret float %fneg
1000}
1001
1002; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_ieee:
1003; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1004; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1005; GCN: v_mul_f32_e32 [[QUIET_A:v[0-9]+]], 1.0, [[A]]
1006; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[QUIET_A]]
1007; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
1008; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1009define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1010  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1011  %tid.ext = sext i32 %tid to i64
1012  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1013  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1014  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1015  %a = load volatile float, float addrspace(1)* %a.gep
1016  %b = load volatile float, float addrspace(1)* %b.gep
1017  %max = call float @llvm.maxnum.f32(float 0.0, float %a)
1018  %fneg = fneg float %max
1019  %mul = fmul float %fneg, %b
1020  store float %mul, float addrspace(1)* %out.gep
1021  ret void
1022}
1023
1024; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32_no_ieee:
1025; GCN-NOT: v0
1026; GCN-NOT: v1
1027; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, v0
1028; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], v1
1029; GCN-NEXT: ; return
1030define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
1031  %max = call float @llvm.maxnum.f32(float 0.0, float %a)
1032  %fneg = fneg float %max
1033  %mul = fmul float %fneg, %b
1034  ret float %mul
1035}
1036
1037; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_ieee:
1038; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1039; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1040; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
1041; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
1042; GCN: v_min_f32_e32 [[MAX0:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
1043; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]]
1044; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MAX0]]
1045; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
1046define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1047  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1048  %tid.ext = sext i32 %tid to i64
1049  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1050  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1051  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1052  %a = load volatile float, float addrspace(1)* %a.gep
1053  %b = load volatile float, float addrspace(1)* %b.gep
1054  %max = call float @llvm.maxnum.f32(float %a, float %b)
1055  %fneg = fneg float %max
1056  %use1 = fmul float %max, 4.0
1057  store volatile float %fneg, float addrspace(1)* %out
1058  store volatile float %use1, float addrspace(1)* %out
1059  ret void
1060}
1061
1062; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32_no_ieee:
1063; GCN-NOT: v0
1064; GCN-NOT: v1
1065; GCN: v_min_f32_e64 v0, -v0, -v1
1066; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0
1067; GCN-NEXT: ; return
1068define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 {
1069  %max = call float @llvm.maxnum.f32(float %a, float %b)
1070  %fneg = fneg float %max
1071  %use1 = fmul float %max, 4.0
1072  %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
1073  %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
1074  ret <2 x float> %ins1
1075}
1076
1077; --------------------------------------------------------------------------------
1078; fma tests
1079; --------------------------------------------------------------------------------
1080
1081; GCN-LABEL: {{^}}v_fneg_fma_f32:
1082; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1083; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1084; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1085
1086; GCN-SAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
1087; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[RESULT]]
1088
1089; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
1090; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1091define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1092  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1093  %tid.ext = sext i32 %tid to i64
1094  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1095  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1096  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1097  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1098  %a = load volatile float, float addrspace(1)* %a.gep
1099  %b = load volatile float, float addrspace(1)* %b.gep
1100  %c = load volatile float, float addrspace(1)* %c.gep
1101  %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1102  %fneg = fneg float %fma
1103  store float %fneg, float addrspace(1)* %out.gep
1104  ret void
1105}
1106
1107; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32:
1108; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1109; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1110; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1111; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1112; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
1113; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1114; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1115define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1116  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1117  %tid.ext = sext i32 %tid to i64
1118  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1119  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1120  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1121  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1122  %a = load volatile float, float addrspace(1)* %a.gep
1123  %b = load volatile float, float addrspace(1)* %b.gep
1124  %c = load volatile float, float addrspace(1)* %c.gep
1125  %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1126  %fneg = fneg float %fma
1127  store volatile float %fneg, float addrspace(1)* %out
1128  store volatile float %fma, float addrspace(1)* %out
1129  ret void
1130}
1131
1132; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32:
1133; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1134; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1135; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1136
1137; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1138; GCN-SAFE: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]]
1139; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]]
1140
1141; GCN-NSZ: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
1142; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_FMA]]
1143
1144; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1145; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1146define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1147  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1148  %tid.ext = sext i32 %tid to i64
1149  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1150  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1151  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1152  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1153  %a = load volatile float, float addrspace(1)* %a.gep
1154  %b = load volatile float, float addrspace(1)* %b.gep
1155  %c = load volatile float, float addrspace(1)* %c.gep
1156  %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1157  %fneg = fneg float %fma
1158  %use1 = fmul float %fma, 4.0
1159  store volatile float %fneg, float addrspace(1)* %out
1160  store volatile float %use1, float addrspace(1)* %out
1161  ret void
1162}
1163
1164; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32:
1165; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1166; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1167; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1168
1169; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], [[C]]
1170; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1171
1172; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1173; GCN-NSZ-NOT: [[FMA]]
1174; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1175define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1176  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1177  %tid.ext = sext i32 %tid to i64
1178  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1179  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1180  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1181  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1182  %a = load volatile float, float addrspace(1)* %a.gep
1183  %b = load volatile float, float addrspace(1)* %b.gep
1184  %c = load volatile float, float addrspace(1)* %c.gep
1185  %fneg.a = fneg float %a
1186  %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1187  %fneg = fneg float %fma
1188  store volatile float %fneg, float addrspace(1)* %out
1189  ret void
1190}
1191
1192; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32:
1193; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1194; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1195; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1196
1197; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
1198; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1199
1200; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1201; GCN-NSZ-NOT: [[FMA]]
1202; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1203define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1204  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1205  %tid.ext = sext i32 %tid to i64
1206  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1207  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1208  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1209  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1210  %a = load volatile float, float addrspace(1)* %a.gep
1211  %b = load volatile float, float addrspace(1)* %b.gep
1212  %c = load volatile float, float addrspace(1)* %c.gep
1213  %fneg.b = fneg float %b
1214  %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
1215  %fneg = fneg float %fma
1216  store volatile float %fneg, float addrspace(1)* %out
1217  ret void
1218}
1219
1220; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32:
1221; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1222; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1223; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1224
1225; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1226; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
1227
1228; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
1229; GCN-NSZ-NOT: [[FMA]]
1230; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1231define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1232  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1233  %tid.ext = sext i32 %tid to i64
1234  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1235  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1236  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1237  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1238  %a = load volatile float, float addrspace(1)* %a.gep
1239  %b = load volatile float, float addrspace(1)* %b.gep
1240  %c = load volatile float, float addrspace(1)* %c.gep
1241  %fneg.a = fneg float %a
1242  %fneg.b = fneg float %b
1243  %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
1244  %fneg = fneg float %fma
1245  store volatile float %fneg, float addrspace(1)* %out
1246  ret void
1247}
1248
1249; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32:
1250; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1251; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1252; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1253
1254; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], -[[C]]
1255; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
1256
1257; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
1258; GCN-NSZ-NOT: [[FMA]]
1259; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1260define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1261  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1262  %tid.ext = sext i32 %tid to i64
1263  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1264  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1265  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1266  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1267  %a = load volatile float, float addrspace(1)* %a.gep
1268  %b = load volatile float, float addrspace(1)* %b.gep
1269  %c = load volatile float, float addrspace(1)* %c.gep
1270  %fneg.a = fneg float %a
1271  %fneg.c = fneg float %c
1272  %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
1273  %fneg = fneg float %fma
1274  store volatile float %fneg, float addrspace(1)* %out
1275  ret void
1276}
1277
1278; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32:
1279; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1280; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1281; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1282
1283; GCN-NSZ-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1284; GCN-NSZ-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1285
1286; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
1287; GCN-NSZ-NOT: [[FMA]]
1288; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1289define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1290  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1291  %tid.ext = sext i32 %tid to i64
1292  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1293  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1294  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1295  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1296  %a = load volatile float, float addrspace(1)* %a.gep
1297  %b = load volatile float, float addrspace(1)* %b.gep
1298  %c = load volatile float, float addrspace(1)* %c.gep
1299  %fneg.c = fneg float %c
1300  %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
1301  %fneg = fneg float %fma
1302  store volatile float %fneg, float addrspace(1)* %out
1303  ret void
1304}
1305
1306; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32:
1307; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1308; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1309; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1310
1311; GCN-SAFE: v_xor_b32
1312; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]],
1313; GCN-SAFE: v_xor_b32
1314
1315; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1316; GCN-NSZ-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1317
1318; GCN-NSZ-NOT: [[FMA]]
1319; GCN-NSZ-NOT: [[NEG_A]]
1320; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
1321; GCN-NSZ-NOT: [[NEG_A]]
1322; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1323define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1324  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1325  %tid.ext = sext i32 %tid to i64
1326  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1327  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1328  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1329  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1330  %a = load volatile float, float addrspace(1)* %a.gep
1331  %b = load volatile float, float addrspace(1)* %b.gep
1332  %c = load volatile float, float addrspace(1)* %c.gep
1333  %fneg.a = fneg float %a
1334  %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1335  %fneg = fneg float %fma
1336  store volatile float %fneg, float addrspace(1)* %out
1337  store volatile float %fneg.a, float addrspace(1)* %out
1338  ret void
1339}
1340
1341; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32:
1342; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1343; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1344; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1345
1346; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1347; GCN-SAFE-DAG: v_fma_f32 [[FMA:v[0-9]+]]
1348; GCN-SAFE-DAG: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
1349
1350; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
1351; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
1352; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1353define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float %d) #0 {
1354  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1355  %tid.ext = sext i32 %tid to i64
1356  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1357  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1358  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1359  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1360  %a = load volatile float, float addrspace(1)* %a.gep
1361  %b = load volatile float, float addrspace(1)* %b.gep
1362  %c = load volatile float, float addrspace(1)* %c.gep
1363  %fneg.a = fneg float %a
1364  %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1365  %fneg = fneg float %fma
1366  %use1 = fmul float %fneg.a, %d
1367  store volatile float %fneg, float addrspace(1)* %out
1368  store volatile float %use1, float addrspace(1)* %out
1369  ret void
1370}
1371
1372; --------------------------------------------------------------------------------
1373; fmad tests
1374; --------------------------------------------------------------------------------
1375
1376; GCN-LABEL: {{^}}v_fneg_fmad_f32:
1377; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1378; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1379; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1380
1381; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1382; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[C]]
1383
1384; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
1385; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1386define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1387  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1388  %tid.ext = sext i32 %tid to i64
1389  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1390  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1391  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1392  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1393  %a = load volatile float, float addrspace(1)* %a.gep
1394  %b = load volatile float, float addrspace(1)* %b.gep
1395  %c = load volatile float, float addrspace(1)* %c.gep
1396  %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1397  %fneg = fneg float %fma
1398  store float %fneg, float addrspace(1)* %out.gep
1399  ret void
1400}
1401
1402; GCN-LABEL: {{^}}v_fneg_fmad_v4f32:
1403
1404; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
1405; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
1406; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
1407; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
1408define amdgpu_kernel void @v_fneg_fmad_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %a.ptr, <4 x float> addrspace(1)* %b.ptr, <4 x float> addrspace(1)* %c.ptr) #0 {
1409  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1410  %tid.ext = sext i32 %tid to i64
1411  %a.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a.ptr, i64 %tid.ext
1412  %b.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %b.ptr, i64 %tid.ext
1413  %c.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %c.ptr, i64 %tid.ext
1414  %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext
1415  %a = load volatile <4 x float>, <4 x float> addrspace(1)* %a.gep
1416  %b = load volatile <4 x float>, <4 x float> addrspace(1)* %b.gep
1417  %c = load volatile <4 x float>, <4 x float> addrspace(1)* %c.gep
1418  %fma = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
1419  %fneg = fneg <4 x float> %fma
1420  store <4 x float> %fneg, <4 x float> addrspace(1)* %out.gep
1421  ret void
1422}
1423
1424; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32:
1425; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1426; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1427; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
1428
1429; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]]
1430; GCN-SAFE: v_xor_b32_e32 [[NEG_MAD:v[0-9]+]], 0x80000000, [[C]]
1431; GCN-SAFE-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]]
1432
1433; GCN-NSZ: v_mad_f32 [[NEG_MAD:v[0-9]+]], [[A]], -[[B]], -[[C]]
1434; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_MAD]]
1435
1436; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MAD]]
1437; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1438define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
1439  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1440  %tid.ext = sext i32 %tid to i64
1441  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1442  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1443  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
1444  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1445  %a = load volatile float, float addrspace(1)* %a.gep
1446  %b = load volatile float, float addrspace(1)* %b.gep
1447  %c = load volatile float, float addrspace(1)* %c.gep
1448  %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1449  %fneg = fneg float %fma
1450  %use1 = fmul float %fma, 4.0
1451  store volatile float %fneg, float addrspace(1)* %out
1452  store volatile float %use1, float addrspace(1)* %out
1453  ret void
1454}
1455
1456; --------------------------------------------------------------------------------
1457; fp_extend tests
1458; --------------------------------------------------------------------------------
1459
1460; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64:
1461; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1462; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]]
1463; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1464define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1465  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1466  %tid.ext = sext i32 %tid to i64
1467  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1468  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1469  %a = load volatile float, float addrspace(1)* %a.gep
1470  %fpext = fpext float %a to double
1471  %fneg = fsub double -0.000000e+00, %fpext
1472  store double %fneg, double addrspace(1)* %out.gep
1473  ret void
1474}
1475
1476; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64:
1477; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1478; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1479; GCN: {{buffer|flat}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1480define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1481  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1482  %tid.ext = sext i32 %tid to i64
1483  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1484  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1485  %a = load volatile float, float addrspace(1)* %a.gep
1486  %fneg.a = fneg float %a
1487  %fpext = fpext float %fneg.a to double
1488  %fneg = fsub double -0.000000e+00, %fpext
1489  store double %fneg, double addrspace(1)* %out.gep
1490  ret void
1491}
1492
1493; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64:
1494; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1495; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
1496; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]]
1497; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1498; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FNEG_A]]
1499define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1500  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1501  %tid.ext = sext i32 %tid to i64
1502  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1503  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1504  %a = load volatile float, float addrspace(1)* %a.gep
1505  %fneg.a = fneg float %a
1506  %fpext = fpext float %fneg.a to double
1507  %fneg = fsub double -0.000000e+00, %fpext
1508  store volatile double %fneg, double addrspace(1)* %out.gep
1509  store volatile float %fneg.a, float addrspace(1)* undef
1510  ret void
1511}
1512
1513; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
1514; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1515; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1516; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1517; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1518; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}
1519define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1520  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1521  %tid.ext = sext i32 %tid to i64
1522  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1523  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1524  %a = load volatile float, float addrspace(1)* %a.gep
1525  %fpext = fpext float %a to double
1526  %fneg = fsub double -0.000000e+00, %fpext
1527  store volatile double %fneg, double addrspace(1)* %out.gep
1528  store volatile double %fpext, double addrspace(1)* undef
1529  ret void
1530}
1531
1532; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
1533; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1534; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]]
1535; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
1536; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0
1537; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]{{\]}}
1538; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1539define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1540  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1541  %tid.ext = sext i32 %tid to i64
1542  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1543  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
1544  %a = load volatile float, float addrspace(1)* %a.gep
1545  %fpext = fpext float %a to double
1546  %fneg = fsub double -0.000000e+00, %fpext
1547  %mul = fmul double %fpext, 4.0
1548  store volatile double %fneg, double addrspace(1)* %out.gep
1549  store volatile double %mul, double addrspace(1)* %out.gep
1550  ret void
1551}
1552
1553; FIXME: Source modifiers not folded for f16->f32
1554; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
1555define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1556  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1557  %tid.ext = sext i32 %tid to i64
1558  %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1559  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1560  %a = load volatile half, half addrspace(1)* %a.gep
1561  %fpext = fpext half %a to float
1562  %fneg = fneg float %fpext
1563  store volatile float %fneg, float addrspace(1)* %out.gep
1564  store volatile float %fpext, float addrspace(1)* %out.gep
1565  ret void
1566}
1567
1568; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
1569define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
1570  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1571  %tid.ext = sext i32 %tid to i64
1572  %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
1573  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1574  %a = load volatile half, half addrspace(1)* %a.gep
1575  %fpext = fpext half %a to float
1576  %fneg = fneg float %fpext
1577  %mul = fmul float %fpext, 4.0
1578  store volatile float %fneg, float addrspace(1)* %out.gep
1579  store volatile float %mul, float addrspace(1)* %out.gep
1580  ret void
1581}
1582
1583; --------------------------------------------------------------------------------
1584; fp_round tests
1585; --------------------------------------------------------------------------------
1586
1587; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32:
1588; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1589; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]]
1590; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1591define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1592  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1593  %tid.ext = sext i32 %tid to i64
1594  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1595  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1596  %a = load volatile double, double addrspace(1)* %a.gep
1597  %fpround = fptrunc double %a to float
1598  %fneg = fneg float %fpround
1599  store float %fneg, float addrspace(1)* %out.gep
1600  ret void
1601}
1602
1603; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32:
1604; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1605; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1606; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1607define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1608  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1609  %tid.ext = sext i32 %tid to i64
1610  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1611  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1612  %a = load volatile double, double addrspace(1)* %a.gep
1613  %fneg.a = fsub double -0.000000e+00, %a
1614  %fpround = fptrunc double %fneg.a to float
1615  %fneg = fneg float %fpround
1616  store float %fneg, float addrspace(1)* %out.gep
1617  ret void
1618}
1619
1620; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32:
1621; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
1622; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
1623; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
1624; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1625; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}}
1626define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1627  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1628  %tid.ext = sext i32 %tid to i64
1629  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1630  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1631  %a = load volatile double, double addrspace(1)* %a.gep
1632  %fneg.a = fsub double -0.000000e+00, %a
1633  %fpround = fptrunc double %fneg.a to float
1634  %fneg = fneg float %fpround
1635  store volatile float %fneg, float addrspace(1)* %out.gep
1636  store volatile double %fneg.a, double addrspace(1)* undef
1637  ret void
1638}
1639
1640; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32:
1641; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1642; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
1643; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}}
1644
1645; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1646; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1647define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr, double %c) #0 {
1648  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1649  %tid.ext = sext i32 %tid to i64
1650  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1651  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1652  %a = load volatile double, double addrspace(1)* %a.gep
1653  %fneg.a = fsub double -0.000000e+00, %a
1654  %fpround = fptrunc double %fneg.a to float
1655  %fneg = fneg float %fpround
1656  %use1 = fmul double %fneg.a, %c
1657  store volatile float %fneg, float addrspace(1)* %out.gep
1658  store volatile double %use1, double addrspace(1)* undef
1659  ret void
1660}
1661
1662; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16:
1663; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1664; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1665; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1666define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1667  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1668  %tid.ext = sext i32 %tid to i64
1669  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1670  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1671  %a = load volatile float, float addrspace(1)* %a.gep
1672  %fpround = fptrunc float %a to half
1673  %fneg = fsub half -0.000000e+00, %fpround
1674  store half %fneg, half addrspace(1)* %out.gep
1675  ret void
1676}
1677
1678; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16:
1679; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1680; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1681; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1682define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1683  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1684  %tid.ext = sext i32 %tid to i64
1685  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1686  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1687  %a = load volatile float, float addrspace(1)* %a.gep
1688  %fneg.a = fneg float %a
1689  %fpround = fptrunc float %fneg.a to half
1690  %fneg = fsub half -0.000000e+00, %fpround
1691  store half %fneg, half addrspace(1)* %out.gep
1692  ret void
1693}
1694
1695; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32:
1696; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
1697; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]]
1698; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]]
1699; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG]]
1700; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[CVT]]
1701define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
1702  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1703  %tid.ext = sext i32 %tid to i64
1704  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
1705  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1706  %a = load volatile double, double addrspace(1)* %a.gep
1707  %fpround = fptrunc double %a to float
1708  %fneg = fneg float %fpround
1709  store volatile float %fneg, float addrspace(1)* %out.gep
1710  store volatile float %fpround, float addrspace(1)* %out.gep
1711  ret void
1712}
1713
1714; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16:
1715; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1716; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1717; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1718; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1719; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1720define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1721  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1722  %tid.ext = sext i32 %tid to i64
1723  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1724  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1725  %a = load volatile float, float addrspace(1)* %a.gep
1726  %fneg.a = fneg float %a
1727  %fpround = fptrunc float %fneg.a to half
1728  %fneg = fsub half -0.000000e+00, %fpround
1729  store volatile half %fneg, half addrspace(1)* %out.gep
1730  store volatile float %fneg.a, float addrspace(1)* undef
1731  ret void
1732}
1733
1734; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16:
1735; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1736; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1737; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s
1738; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1739; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
1740define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1741  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1742  %tid.ext = sext i32 %tid to i64
1743  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1744  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
1745  %a = load volatile float, float addrspace(1)* %a.gep
1746  %fneg.a = fneg float %a
1747  %fpround = fptrunc float %fneg.a to half
1748  %fneg = fsub half -0.000000e+00, %fpround
1749  %use1 = fmul float %fneg.a, %c
1750  store volatile half %fneg, half addrspace(1)* %out.gep
1751  store volatile float %use1, float addrspace(1)* undef
1752  ret void
1753}
1754
1755; --------------------------------------------------------------------------------
1756; rcp tests
1757; --------------------------------------------------------------------------------
1758
1759; GCN-LABEL: {{^}}v_fneg_rcp_f32:
1760; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1761; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
1762; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1763define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1764  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1765  %tid.ext = sext i32 %tid to i64
1766  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1767  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1768  %a = load volatile float, float addrspace(1)* %a.gep
1769  %rcp = call float @llvm.amdgcn.rcp.f32(float %a)
1770  %fneg = fneg float %rcp
1771  store float %fneg, float addrspace(1)* %out.gep
1772  ret void
1773}
1774
1775; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32:
1776; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1777; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1778; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1779define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1780  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1781  %tid.ext = sext i32 %tid to i64
1782  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1783  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1784  %a = load volatile float, float addrspace(1)* %a.gep
1785  %fneg.a = fneg float %a
1786  %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1787  %fneg = fneg float %rcp
1788  store float %fneg, float addrspace(1)* %out.gep
1789  ret void
1790}
1791
1792; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32:
1793; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1794; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1795; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1796; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1797; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1798define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
1799  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1800  %tid.ext = sext i32 %tid to i64
1801  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1802  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1803  %a = load volatile float, float addrspace(1)* %a.gep
1804  %fneg.a = fneg float %a
1805  %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1806  %fneg = fneg float %rcp
1807  store volatile float %fneg, float addrspace(1)* %out.gep
1808  store volatile float %fneg.a, float addrspace(1)* undef
1809  ret void
1810}
1811
1812; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32:
1813; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1814; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
1815; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1816; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1817; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1818define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
1819  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1820  %tid.ext = sext i32 %tid to i64
1821  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1822  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1823  %a = load volatile float, float addrspace(1)* %a.gep
1824  %fneg.a = fneg float %a
1825  %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1826  %fneg = fneg float %rcp
1827  %use1 = fmul float %fneg.a, %c
1828  store volatile float %fneg, float addrspace(1)* %out.gep
1829  store volatile float %use1, float addrspace(1)* undef
1830  ret void
1831}
1832
1833; --------------------------------------------------------------------------------
1834; fmul_legacy tests
1835; --------------------------------------------------------------------------------
1836
1837; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32:
1838; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1839; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1840; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
1841; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
1842define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1843  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1844  %tid.ext = sext i32 %tid to i64
1845  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1846  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1847  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1848  %a = load volatile float, float addrspace(1)* %a.gep
1849  %b = load volatile float, float addrspace(1)* %b.gep
1850  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1851  %fneg = fneg float %mul
1852  store float %fneg, float addrspace(1)* %out.gep
1853  ret void
1854}
1855
1856; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32:
1857; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1858; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1859; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1860; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]]
1861; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1862; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1863define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1864  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1865  %tid.ext = sext i32 %tid to i64
1866  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1867  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1868  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1869  %a = load volatile float, float addrspace(1)* %a.gep
1870  %b = load volatile float, float addrspace(1)* %b.gep
1871  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1872  %fneg = fneg float %mul
1873  store volatile float %fneg, float addrspace(1)* %out
1874  store volatile float %mul, float addrspace(1)* %out
1875  ret void
1876}
1877
1878; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32:
1879; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1880; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1881; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1882; GCN-NEXT: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[ADD]], 4.0
1883; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1884; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1885define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1886  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1887  %tid.ext = sext i32 %tid to i64
1888  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1889  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1890  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1891  %a = load volatile float, float addrspace(1)* %a.gep
1892  %b = load volatile float, float addrspace(1)* %b.gep
1893  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
1894  %fneg = fneg float %mul
1895  %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
1896  store volatile float %fneg, float addrspace(1)* %out
1897  store volatile float %use1, float addrspace(1)* %out
1898  ret void
1899}
1900
1901; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32:
1902; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1903; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1904; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1905; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1906define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1907  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1908  %tid.ext = sext i32 %tid to i64
1909  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1910  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1911  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1912  %a = load volatile float, float addrspace(1)* %a.gep
1913  %b = load volatile float, float addrspace(1)* %b.gep
1914  %fneg.a = fneg float %a
1915  %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1916  %fneg = fneg float %mul
1917  store volatile float %fneg, float addrspace(1)* %out
1918  ret void
1919}
1920
1921; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32:
1922; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1923; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1924; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1925; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1926define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1927  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1928  %tid.ext = sext i32 %tid to i64
1929  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1930  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1931  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1932  %a = load volatile float, float addrspace(1)* %a.gep
1933  %b = load volatile float, float addrspace(1)* %b.gep
1934  %fneg.b = fneg float %b
1935  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
1936  %fneg = fneg float %mul
1937  store volatile float %fneg, float addrspace(1)* %out
1938  ret void
1939}
1940
1941; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32:
1942; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1943; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1944; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1945; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1946define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1947  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1948  %tid.ext = sext i32 %tid to i64
1949  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1950  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1951  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1952  %a = load volatile float, float addrspace(1)* %a.gep
1953  %b = load volatile float, float addrspace(1)* %b.gep
1954  %fneg.a = fneg float %a
1955  %fneg.b = fneg float %b
1956  %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
1957  %fneg = fneg float %mul
1958  store volatile float %fneg, float addrspace(1)* %out
1959  ret void
1960}
1961
1962; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32:
1963; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1964; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1965; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
1966; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1967; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1968; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
1969define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
1970  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1971  %tid.ext = sext i32 %tid to i64
1972  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1973  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1974  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1975  %a = load volatile float, float addrspace(1)* %a.gep
1976  %b = load volatile float, float addrspace(1)* %b.gep
1977  %fneg.a = fneg float %a
1978  %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
1979  %fneg = fneg float %mul
1980  store volatile float %fneg, float addrspace(1)* %out
1981  store volatile float %fneg.a, float addrspace(1)* %out
1982  ret void
1983}
1984
1985; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32:
1986; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1987; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1988; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
1989; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
1990; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
1991; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
1992define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
1993  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1994  %tid.ext = sext i32 %tid to i64
1995  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
1996  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
1997  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
1998  %a = load volatile float, float addrspace(1)* %a.gep
1999  %b = load volatile float, float addrspace(1)* %b.gep
2000  %fneg.a = fneg float %a
2001  %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
2002  %fneg = fneg float %mul
2003  %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
2004  store volatile float %fneg, float addrspace(1)* %out
2005  store volatile float %use1, float addrspace(1)* %out
2006  ret void
2007}
2008
2009; --------------------------------------------------------------------------------
2010; sin tests
2011; --------------------------------------------------------------------------------
2012
2013; GCN-LABEL: {{^}}v_fneg_sin_f32:
2014; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2015; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbe22f983, [[A]]
2016; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]]
2017; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]]
2018; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2019define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2020  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2021  %tid.ext = sext i32 %tid to i64
2022  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2023  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2024  %a = load volatile float, float addrspace(1)* %a.gep
2025  %sin = call float @llvm.sin.f32(float %a)
2026  %fneg = fneg float %sin
2027  store float %fneg, float addrspace(1)* %out.gep
2028  ret void
2029}
2030
2031; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32:
2032; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2033; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2034; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2035define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2036  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2037  %tid.ext = sext i32 %tid to i64
2038  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2039  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2040  %a = load volatile float, float addrspace(1)* %a.gep
2041  %sin = call float @llvm.amdgcn.sin.f32(float %a)
2042  %fneg = fsub float -0.0, %sin
2043  store float %fneg, float addrspace(1)* %out.gep
2044  ret void
2045}
2046
2047; --------------------------------------------------------------------------------
2048; ftrunc tests
2049; --------------------------------------------------------------------------------
2050
2051; GCN-LABEL: {{^}}v_fneg_trunc_f32:
2052; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2053; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2054; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2055define amdgpu_kernel void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2056  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2057  %tid.ext = sext i32 %tid to i64
2058  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2059  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2060  %a = load volatile float, float addrspace(1)* %a.gep
2061  %trunc = call float @llvm.trunc.f32(float %a)
2062  %fneg = fsub float -0.0, %trunc
2063  store float %fneg, float addrspace(1)* %out.gep
2064  ret void
2065}
2066
2067; --------------------------------------------------------------------------------
2068; fround tests
2069; --------------------------------------------------------------------------------
2070
2071; GCN-LABEL: {{^}}v_fneg_round_f32:
2072; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2073; GCN: v_trunc_f32_e32
2074; GCN: v_sub_f32_e32
2075; GCN: v_cndmask_b32
2076
2077; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
2078; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[ADD]]
2079
2080; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}}
2081; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2082define amdgpu_kernel void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2083  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2084  %tid.ext = sext i32 %tid to i64
2085  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2086  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2087  %a = load volatile float, float addrspace(1)* %a.gep
2088  %round = call float @llvm.round.f32(float %a)
2089  %fneg = fsub float -0.0, %round
2090  store float %fneg, float addrspace(1)* %out.gep
2091  ret void
2092}
2093
2094; --------------------------------------------------------------------------------
2095; rint tests
2096; --------------------------------------------------------------------------------
2097
2098; GCN-LABEL: {{^}}v_fneg_rint_f32:
2099; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2100; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2101; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2102define amdgpu_kernel void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2103  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2104  %tid.ext = sext i32 %tid to i64
2105  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2106  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2107  %a = load volatile float, float addrspace(1)* %a.gep
2108  %rint = call float @llvm.rint.f32(float %a)
2109  %fneg = fsub float -0.0, %rint
2110  store float %fneg, float addrspace(1)* %out.gep
2111  ret void
2112}
2113
2114; --------------------------------------------------------------------------------
2115; nearbyint tests
2116; --------------------------------------------------------------------------------
2117
2118; GCN-LABEL: {{^}}v_fneg_nearbyint_f32:
2119; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2120; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
2121; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2122define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2123  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2124  %tid.ext = sext i32 %tid to i64
2125  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2126  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2127  %a = load volatile float, float addrspace(1)* %a.gep
2128  %nearbyint = call float @llvm.nearbyint.f32(float %a)
2129  %fneg = fsub float -0.0, %nearbyint
2130  store float %fneg, float addrspace(1)* %out.gep
2131  ret void
2132}
2133
2134; --------------------------------------------------------------------------------
2135; fcanonicalize tests
2136; --------------------------------------------------------------------------------
2137
2138; GCN-LABEL: {{^}}v_fneg_canonicalize_f32:
2139; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2140; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]]
2141; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
2142define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
2143  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2144  %tid.ext = sext i32 %tid to i64
2145  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2146  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2147  %a = load volatile float, float addrspace(1)* %a.gep
2148  %trunc = call float @llvm.canonicalize.f32(float %a)
2149  %fneg = fsub float -0.0, %trunc
2150  store float %fneg, float addrspace(1)* %out.gep
2151  ret void
2152}
2153
2154; --------------------------------------------------------------------------------
2155; vintrp tests
2156; --------------------------------------------------------------------------------
2157
2158; GCN-LABEL: {{^}}v_fneg_interp_p1_f32:
2159; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2160; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2161; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2162; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2163; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2164define amdgpu_kernel void @v_fneg_interp_p1_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
2165  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2166  %tid.ext = sext i32 %tid to i64
2167  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2168  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2169  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2170  %a = load volatile float, float addrspace(1)* %a.gep
2171  %b = load volatile float, float addrspace(1)* %b.gep
2172  %mul = fmul float %a, %b
2173  %fneg = fsub float -0.0, %mul
2174  %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
2175  %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
2176  store volatile float %intrp0, float addrspace(1)* %out.gep
2177  store volatile float %intrp1, float addrspace(1)* %out.gep
2178  ret void
2179}
2180
2181; GCN-LABEL: {{^}}v_fneg_interp_p2_f32:
2182; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2183; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2184; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2185; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2186; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
2187define amdgpu_kernel void @v_fneg_interp_p2_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
2188  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2189  %tid.ext = sext i32 %tid to i64
2190  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2191  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2192  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2193  %a = load volatile float, float addrspace(1)* %a.gep
2194  %b = load volatile float, float addrspace(1)* %b.gep
2195  %mul = fmul float %a, %b
2196  %fneg = fsub float -0.0, %mul
2197  %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
2198  %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
2199  store volatile float %intrp0, float addrspace(1)* %out.gep
2200  store volatile float %intrp1, float addrspace(1)* %out.gep
2201  ret void
2202}
2203
2204; --------------------------------------------------------------------------------
2205; CopyToReg tests
2206; --------------------------------------------------------------------------------
2207
2208; GCN-LABEL: {{^}}v_fneg_copytoreg_f32:
2209; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2210; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2211; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2212; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]]
2213; GCN: s_cbranch_scc0
2214
2215; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2216; GCN: s_endpgm
2217
2218; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x80000000, [[MUL0]]
2219; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]]
2220; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2221
2222define amdgpu_kernel void @v_fneg_copytoreg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2223  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2224  %tid.ext = sext i32 %tid to i64
2225  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2226  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2227  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2228  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2229  %a = load volatile float, float addrspace(1)* %a.gep
2230  %b = load volatile float, float addrspace(1)* %b.gep
2231  %c = load volatile float, float addrspace(1)* %c.gep
2232  %mul = fmul float %a, %b
2233  %fneg = fsub float -0.0, %mul
2234  %cmp0 = icmp eq i32 %d, 0
2235  br i1 %cmp0, label %if, label %endif
2236
2237if:
2238  %mul1 = fmul float %fneg, %c
2239  store volatile float %mul1, float addrspace(1)* %out.gep
2240  br label %endif
2241
2242endif:
2243  store volatile float %mul, float addrspace(1)* %out.gep
2244  ret void
2245}
2246
2247; --------------------------------------------------------------------------------
2248; inlineasm tests
2249; --------------------------------------------------------------------------------
2250
2251; Can't fold into use, so should fold into source
2252; GCN-LABEL: {{^}}v_fneg_inlineasm_f32:
2253; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2254; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2255; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
2256; GCN: ; use [[MUL]]
2257; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2258define amdgpu_kernel void @v_fneg_inlineasm_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2259  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2260  %tid.ext = sext i32 %tid to i64
2261  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2262  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2263  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2264  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2265  %a = load volatile float, float addrspace(1)* %a.gep
2266  %b = load volatile float, float addrspace(1)* %b.gep
2267  %c = load volatile float, float addrspace(1)* %c.gep
2268  %mul = fmul float %a, %b
2269  %fneg = fsub float -0.0, %mul
2270  call void asm sideeffect "; use $0", "v"(float %fneg) #0
2271  store volatile float %fneg, float addrspace(1)* %out.gep
2272  ret void
2273}
2274
2275; --------------------------------------------------------------------------------
2276; inlineasm tests
2277; --------------------------------------------------------------------------------
2278
2279; Can't fold into use, so should fold into source
2280; GCN-LABEL: {{^}}v_fneg_inlineasm_multi_use_src_f32:
2281; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2282; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2283; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]]
2284; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]]
2285; GCN: ; use [[NEG]]
2286; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
2287define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
2288  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2289  %tid.ext = sext i32 %tid to i64
2290  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2291  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2292  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2293  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2294  %a = load volatile float, float addrspace(1)* %a.gep
2295  %b = load volatile float, float addrspace(1)* %b.gep
2296  %c = load volatile float, float addrspace(1)* %c.gep
2297  %mul = fmul float %a, %b
2298  %fneg = fsub float -0.0, %mul
2299  call void asm sideeffect "; use $0", "v"(float %fneg) #0
2300  store volatile float %mul, float addrspace(1)* %out.gep
2301  ret void
2302}
2303
2304; --------------------------------------------------------------------------------
2305; code size regression tests
2306; --------------------------------------------------------------------------------
2307
2308; There are multiple users of the fneg that must use a VOP3
2309; instruction, so there is no penalty
2310; GCN-LABEL: {{^}}multiuse_fneg_2_vop3_users_f32:
2311; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2312; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2313; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2314
2315; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], [[C]]
2316; GCN-NEXT: v_fma_f32 [[FMA1:v[0-9]+]], -[[A]], [[C]], 2.0
2317
2318; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2319; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA1]]
2320define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2321  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2322  %tid.ext = sext i32 %tid to i64
2323  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2324  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2325  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2326  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2327  %a = load volatile float, float addrspace(1)* %a.gep
2328  %b = load volatile float, float addrspace(1)* %b.gep
2329  %c = load volatile float, float addrspace(1)* %c.gep
2330
2331  %fneg.a = fsub float -0.0, %a
2332  %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
2333  %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
2334
2335  store volatile float %fma0, float addrspace(1)* %out
2336  store volatile float %fma1, float addrspace(1)* %out
2337  ret void
2338}
2339
2340; There are multiple users, but both require using a larger encoding
2341; for the modifier.
2342
2343; GCN-LABEL: {{^}}multiuse_fneg_2_vop2_users_f32:
2344; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2345; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2346; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2347
2348; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], -[[A]], [[B]]
2349; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2350; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2351; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2352define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2353  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2354  %tid.ext = sext i32 %tid to i64
2355  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2356  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2357  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2358  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2359  %a = load volatile float, float addrspace(1)* %a.gep
2360  %b = load volatile float, float addrspace(1)* %b.gep
2361  %c = load volatile float, float addrspace(1)* %c.gep
2362
2363  %fneg.a = fsub float -0.0, %a
2364  %mul0 = fmul float %fneg.a, %b
2365  %mul1 = fmul float %fneg.a, %c
2366
2367  store volatile float %mul0, float addrspace(1)* %out
2368  store volatile float %mul1, float addrspace(1)* %out
2369  ret void
2370}
2371
2372; One user is VOP3 so has no cost to folding the modifier, the other does.
2373; GCN-LABEL: {{^}}multiuse_fneg_vop2_vop3_users_f32:
2374; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2375; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2376; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2377
2378; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], 2.0
2379; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]]
2380
2381; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2382; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2383define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
2384  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2385  %tid.ext = sext i32 %tid to i64
2386  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2387  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2388  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2389  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2390  %a = load volatile float, float addrspace(1)* %a.gep
2391  %b = load volatile float, float addrspace(1)* %b.gep
2392  %c = load volatile float, float addrspace(1)* %c.gep
2393
2394  %fneg.a = fsub float -0.0, %a
2395  %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
2396  %mul1 = fmul float %fneg.a, %c
2397
2398  store volatile float %fma0, float addrspace(1)* %out
2399  store volatile float %mul1, float addrspace(1)* %out
2400  ret void
2401}
2402
2403; The use of the fneg requires a code size increase, but folding into
2404; the source does not
2405
2406; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f32:
2407; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2408; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2409; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2410; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2411
2412; GCN-SAFE: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], [[B]], 2.0
2413; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[FMA0]], [[C]]
2414; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL2:v[0-9]+]], -[[FMA0]], [[D]]
2415
2416; GCN-NSZ: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], -[[B]], -2.0
2417; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[FMA0]], [[C]]
2418; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[FMA0]], [[D]]
2419
2420; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2421; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL2]]
2422define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2423  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2424  %tid.ext = sext i32 %tid to i64
2425  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2426  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2427  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2428  %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2429  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2430  %a = load volatile float, float addrspace(1)* %a.gep
2431  %b = load volatile float, float addrspace(1)* %b.gep
2432  %c = load volatile float, float addrspace(1)* %c.gep
2433  %d = load volatile float, float addrspace(1)* %d.gep
2434
2435  %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
2436  %fneg.fma0 = fsub float -0.0, %fma0
2437  %mul1 = fmul float %fneg.fma0, %c
2438  %mul2 = fmul float %fneg.fma0, %d
2439
2440  store volatile float %mul1, float addrspace(1)* %out
2441  store volatile float %mul2, float addrspace(1)* %out
2442  ret void
2443}
2444
2445; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f64:
2446; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
2447; GCN: {{buffer|flat}}_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
2448; GCN: {{buffer|flat}}_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]]
2449; GCN: {{buffer|flat}}_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]]
2450
2451; GCN: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], 2.0
2452; GCN-DAG: v_mul_f64 [[MUL0:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[C]]
2453; GCN-DAG: v_mul_f64 [[MUL1:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[D]]
2454
2455; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL0]]
2456; GCN-NEXT: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2457define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr, double addrspace(1)* %b.ptr, double addrspace(1)* %c.ptr, double addrspace(1)* %d.ptr) #0 {
2458  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2459  %tid.ext = sext i32 %tid to i64
2460  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
2461  %b.gep = getelementptr inbounds double, double addrspace(1)* %b.ptr, i64 %tid.ext
2462  %c.gep = getelementptr inbounds double, double addrspace(1)* %c.ptr, i64 %tid.ext
2463  %d.gep = getelementptr inbounds double, double addrspace(1)* %d.ptr, i64 %tid.ext
2464  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
2465  %a = load volatile double, double addrspace(1)* %a.gep
2466  %b = load volatile double, double addrspace(1)* %b.gep
2467  %c = load volatile double, double addrspace(1)* %c.gep
2468  %d = load volatile double, double addrspace(1)* %d.gep
2469
2470  %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0)
2471  %fneg.fma0 = fsub double -0.0, %fma0
2472  %mul1 = fmul double %fneg.fma0, %c
2473  %mul2 = fmul double %fneg.fma0, %d
2474
2475  store volatile double %mul1, double addrspace(1)* %out
2476  store volatile double %mul2, double addrspace(1)* %out
2477  ret void
2478}
2479
2480; %trunc.a has one fneg use, but it requires a code size increase and
2481; %the fneg can instead be folded for free into the fma.
2482
2483; GCN-LABEL: {{^}}one_use_cost_to_fold_into_src_f32:
2484; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2485; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2486; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2487; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2488; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2489; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2490define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2491  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2492  %tid.ext = sext i32 %tid to i64
2493  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2494  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2495  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2496  %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2497  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2498  %a = load volatile float, float addrspace(1)* %a.gep
2499  %b = load volatile float, float addrspace(1)* %b.gep
2500  %c = load volatile float, float addrspace(1)* %c.gep
2501  %d = load volatile float, float addrspace(1)* %d.gep
2502
2503  %trunc.a = call float @llvm.trunc.f32(float %a)
2504  %trunc.fneg.a = fsub float -0.0, %trunc.a
2505  %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2506  store volatile float %fma0, float addrspace(1)* %out
2507  ret void
2508}
2509
2510; GCN-LABEL: {{^}}multi_use_cost_to_fold_into_src:
2511; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
2512; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
2513; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
2514; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]]
2515; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
2516; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
2517; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]]
2518; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
2519; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
2520define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
2521  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2522  %tid.ext = sext i32 %tid to i64
2523  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
2524  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
2525  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
2526  %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
2527  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
2528  %a = load volatile float, float addrspace(1)* %a.gep
2529  %b = load volatile float, float addrspace(1)* %b.gep
2530  %c = load volatile float, float addrspace(1)* %c.gep
2531  %d = load volatile float, float addrspace(1)* %d.gep
2532
2533  %trunc.a = call float @llvm.trunc.f32(float %a)
2534  %trunc.fneg.a = fsub float -0.0, %trunc.a
2535  %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2536  %mul1 = fmul float %trunc.a, %d
2537  store volatile float %fma0, float addrspace(1)* %out
2538  store volatile float %mul1, float addrspace(1)* %out
2539  ret void
2540}
2541
2542declare i32 @llvm.amdgcn.workitem.id.x() #1
2543declare float @llvm.fma.f32(float, float, float) #1
2544declare float @llvm.fmuladd.f32(float, float, float) #1
2545declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
2546declare float @llvm.sin.f32(float) #1
2547declare float @llvm.trunc.f32(float) #1
2548declare float @llvm.round.f32(float) #1
2549declare float @llvm.rint.f32(float) #1
2550declare float @llvm.nearbyint.f32(float) #1
2551declare float @llvm.canonicalize.f32(float) #1
2552declare float @llvm.minnum.f32(float, float) #1
2553declare float @llvm.maxnum.f32(float, float) #1
2554declare half @llvm.minnum.f16(half, half) #1
2555declare double @llvm.minnum.f64(double, double) #1
2556declare double @llvm.fma.f64(double, double, double) #1
2557
2558declare float @llvm.amdgcn.sin.f32(float) #1
2559declare float @llvm.amdgcn.rcp.f32(float) #1
2560declare float @llvm.amdgcn.rcp.legacy(float) #1
2561declare float @llvm.amdgcn.fmul.legacy(float, float) #1
2562declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
2563declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
2564
2565attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
2566attributes #1 = { nounwind readnone }
2567attributes #2 = { nounwind "unsafe-fp-math"="true" }
2568