• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3
4declare float @llvm.maxnum.f32(float, float) #0
5declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0
6declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #0
7declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #0
8declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #0
9
10declare double @llvm.maxnum.f64(double, double)
11
12; FUNC-LABEL: @test_fmax_f32
13; SI: v_max_f32_e32
14
15; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
16; EG: MAX_DX10 {{.*}}[[OUT]]
17define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
18  %val = call float @llvm.maxnum.f32(float %a, float %b) #0
19  store float %val, float addrspace(1)* %out, align 4
20  ret void
21}
22
23; FUNC-LABEL: @test_fmax_v2f32
24; SI: v_max_f32_e32
25; SI: v_max_f32_e32
26
27; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
28; EG: MAX_DX10 {{.*}}[[OUT]]
29; EG: MAX_DX10 {{.*}}[[OUT]]
30define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
31  %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) #0
32  store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
33  ret void
34}
35
36; FUNC-LABEL: @test_fmax_v4f32
37; SI: v_max_f32_e32
38; SI: v_max_f32_e32
39; SI: v_max_f32_e32
40; SI: v_max_f32_e32
41
42; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
43; EG: MAX_DX10 {{.*}}[[OUT]]
44; EG: MAX_DX10 {{.*}}[[OUT]]
45; EG: MAX_DX10 {{.*}}[[OUT]]
46; EG: MAX_DX10 {{.*}}[[OUT]]
47define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
48  %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) #0
49  store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
50  ret void
51}
52
53; FUNC-LABEL: @test_fmax_v8f32
54; SI: v_max_f32_e32
55; SI: v_max_f32_e32
56; SI: v_max_f32_e32
57; SI: v_max_f32_e32
58; SI: v_max_f32_e32
59; SI: v_max_f32_e32
60; SI: v_max_f32_e32
61; SI: v_max_f32_e32
62
63; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
64; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
65; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
66; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
67; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
68; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
69; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
70; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
71; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
72; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
73define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
74  %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) #0
75  store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
76  ret void
77}
78
79; FUNC-LABEL: @test_fmax_v16f32
80; SI: v_max_f32_e32
81; SI: v_max_f32_e32
82; SI: v_max_f32_e32
83; SI: v_max_f32_e32
84; SI: v_max_f32_e32
85; SI: v_max_f32_e32
86; SI: v_max_f32_e32
87; SI: v_max_f32_e32
88; SI: v_max_f32_e32
89; SI: v_max_f32_e32
90; SI: v_max_f32_e32
91; SI: v_max_f32_e32
92; SI: v_max_f32_e32
93; SI: v_max_f32_e32
94; SI: v_max_f32_e32
95; SI: v_max_f32_e32
96
97; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
98; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
99; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]]
100; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]]
101; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
102; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
103; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
104; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
105; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
106; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
107; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
108; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
109; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].X
110; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Y
111; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Z
112; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].W
113; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].X
114; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Y
115; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Z
116; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].W
117define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
118  %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) #0
119  store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
120  ret void
121}
122
123; FUNC-LABEL: @constant_fold_fmax_f32
124; SI-NOT: v_max_f32_e32
125; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0
126; SI: buffer_store_dword [[REG]]
127
128; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
129; EG-NOT: MAX_DX10
130; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
131define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
132  %val = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0
133  store float %val, float addrspace(1)* %out, align 4
134  ret void
135}
136
137; FUNC-LABEL: @constant_fold_fmax_f32_nan_nan
138; SI-NOT: v_max_f32_e32
139; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
140; SI: buffer_store_dword [[REG]]
141
142; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
143; EG-NOT: MAX_DX10
144; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
145; EG: 2143289344(nan)
146define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
147  %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
148  store float %val, float addrspace(1)* %out, align 4
149  ret void
150}
151
152; FUNC-LABEL: @constant_fold_fmax_f32_val_nan
153; SI-NOT: v_max_f32_e32
154; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
155; SI: buffer_store_dword [[REG]]
156
157; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
158; EG-NOT: MAX_DX10
159; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
160define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
161  %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) #0
162  store float %val, float addrspace(1)* %out, align 4
163  ret void
164}
165
166; FUNC-LABEL: @constant_fold_fmax_f32_nan_val
167; SI-NOT: v_max_f32_e32
168; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
169; SI: buffer_store_dword [[REG]]
170
171; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
172; EG-NOT: MAX_DX10
173; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
174define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
175  %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) #0
176  store float %val, float addrspace(1)* %out, align 4
177  ret void
178}
179
180; FUNC-LABEL: @constant_fold_fmax_f32_p0_p0
181; SI-NOT: v_max_f32_e32
182; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
183; SI: buffer_store_dword [[REG]]
184
185; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
186; EG-NOT: MAX_DX10
187; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
188define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
189  %val = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0
190  store float %val, float addrspace(1)* %out, align 4
191  ret void
192}
193
194; FUNC-LABEL: @constant_fold_fmax_f32_p0_n0
195; SI-NOT: v_max_f32_e32
196; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
197; SI: buffer_store_dword [[REG]]
198
199; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
200; EG-NOT: MAX_DX10
201; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
202define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
203  %val = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0
204  store float %val, float addrspace(1)* %out, align 4
205  ret void
206}
207
208; FUNC-LABEL: @constant_fold_fmax_f32_n0_p0
209; SI-NOT: v_max_f32_e32
210; SI: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
211; SI: buffer_store_dword [[REG]]
212
213; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
214; EG-NOT: MAX_DX10
215; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
216define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
217  %val = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0
218  store float %val, float addrspace(1)* %out, align 4
219  ret void
220}
221
222; FUNC-LABEL: @constant_fold_fmax_f32_n0_n0
223; SI-NOT: v_max_f32_e32
224; SI: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
225; SI: buffer_store_dword [[REG]]
226
227; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
228; EG-NOT: MAX_DX10
229; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
230define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
231  %val = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0
232  store float %val, float addrspace(1)* %out, align 4
233  ret void
234}
235
236; FUNC-LABEL: @fmax_var_immediate_f32
237; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
238
239; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
240; EG-NOT: MAX_DX10
241; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
242define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
243  %val = call float @llvm.maxnum.f32(float %a, float 2.0) #0
244  store float %val, float addrspace(1)* %out, align 4
245  ret void
246}
247
248; FUNC-LABEL: @fmax_immediate_var_f32
249; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
250
251; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
252; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
253define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
254  %val = call float @llvm.maxnum.f32(float 2.0, float %a) #0
255  store float %val, float addrspace(1)* %out, align 4
256  ret void
257}
258
259; FUNC-LABEL: @fmax_var_literal_f32
260; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
261; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
262
263; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
264; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
265define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
266  %val = call float @llvm.maxnum.f32(float %a, float 99.0) #0
267  store float %val, float addrspace(1)* %out, align 4
268  ret void
269}
270
271; FUNC-LABEL: @fmax_literal_var_f32
272; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
273; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
274
275; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
276; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
277define void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
278  %val = call float @llvm.maxnum.f32(float 99.0, float %a) #0
279  store float %val, float addrspace(1)* %out, align 4
280  ret void
281}
282
283attributes #0 = { nounwind readnone }
284