• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SAFE %s
3; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-NNAN %s
4
5; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VI-SAFE %s
6; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NNAN %s
7
8; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI-SAFE %s
9; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NNAN %s
10
11define half @test_fmax_legacy_ugt_f16(half %a, half %b) #0 {
12; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_f16:
13; GFX9-SAFE:       ; %bb.0:
14; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v1
16; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
17; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
18;
19; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_f16:
20; GFX9-NNAN:       ; %bb.0:
21; GFX9-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22; GFX9-NNAN-NEXT:    v_max_f16_e32 v0, v0, v1
23; GFX9-NNAN-NEXT:    s_setpc_b64 s[30:31]
24;
25; VI-SAFE-LABEL: test_fmax_legacy_ugt_f16:
26; VI-SAFE:       ; %bb.0:
27; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v1
29; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
30; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
31;
32; VI-NNAN-LABEL: test_fmax_legacy_ugt_f16:
33; VI-NNAN:       ; %bb.0:
34; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v1
36; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
37;
38; SI-SAFE-LABEL: test_fmax_legacy_ugt_f16:
39; SI-SAFE:       ; %bb.0:
40; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
42; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
43; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
44; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
45; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v1, v0
46; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
47;
48; SI-NNAN-LABEL: test_fmax_legacy_ugt_f16:
49; SI-NNAN:       ; %bb.0:
50; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
52; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
53; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
54; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
55; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v1
56; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
57  %cmp = fcmp ugt half %a, %b
58  %val = select i1 %cmp, half %a, half %b
59  ret half %val
60}
61
62define <2 x half> @test_fmax_legacy_ugt_v2f16(<2 x half> %a, <2 x half> %b) #0 {
63; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
64; GFX9-SAFE:       ; %bb.0:
65; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
67; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
68; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v3, v2
69; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
70; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v1
71; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
72; GFX9-SAFE-NEXT:    v_and_b32_e32 v0, 0xffff, v0
73; GFX9-SAFE-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
74; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
75;
76; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v2f16:
77; GFX9-NNAN:       ; %bb.0:
78; GFX9-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79; GFX9-NNAN-NEXT:    v_pk_max_f16 v0, v0, v1
80; GFX9-NNAN-NEXT:    s_setpc_b64 s[30:31]
81;
82; VI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
83; VI-SAFE:       ; %bb.0:
84; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
86; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
87; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v3, v2
88; VI-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
89; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v1
90; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
91; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
92; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
93; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
94;
95; VI-NNAN-LABEL: test_fmax_legacy_ugt_v2f16:
96; VI-NNAN:       ; %bb.0:
97; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98; VI-NNAN-NEXT:    v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
99; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v1
100; VI-NNAN-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
101; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
102;
103; SI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
104; SI-SAFE:       ; %bb.0:
105; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
107; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
108; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
109; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
110; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
111; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
112; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
113; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
114; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v2, v0
115; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v3, v1
116; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
117;
118; SI-NNAN-LABEL: test_fmax_legacy_ugt_v2f16:
119; SI-NNAN:       ; %bb.0:
120; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
122; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
123; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
124; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
125; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
126; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
127; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
128; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
129; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v2
130; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v3
131; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
132  %cmp = fcmp ugt <2 x half> %a, %b
133  %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b
134  ret <2 x half> %val
135}
136
137define <3 x half> @test_fmax_legacy_ugt_v3f16(<3 x half> %a, <3 x half> %b) #0 {
138; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
139; GFX9-SAFE:       ; %bb.0:
140; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
142; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
143; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v5, v4
144; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
145; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v3
146; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
147; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v2
148; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
149; GFX9-SAFE-NEXT:    v_and_b32_e32 v0, 0xffff, v0
150; GFX9-SAFE-NEXT:    v_lshl_or_b32 v0, v4, 16, v0
151; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
152;
153; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
154; GFX9-NNAN:       ; %bb.0:
155; GFX9-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156; GFX9-NNAN-NEXT:    v_pk_max_f16 v1, v1, v3
157; GFX9-NNAN-NEXT:    v_pk_max_f16 v0, v0, v2
158; GFX9-NNAN-NEXT:    s_setpc_b64 s[30:31]
159;
160; VI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
161; VI-SAFE:       ; %bb.0:
162; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
164; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
165; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v5, v4
166; VI-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
167; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v3
168; VI-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
169; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v2
170; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
171; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
172; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
173; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
174;
175; VI-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
176; VI-NNAN:       ; %bb.0:
177; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178; VI-NNAN-NEXT:    v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
179; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v2
180; VI-NNAN-NEXT:    v_max_f16_e32 v1, v1, v3
181; VI-NNAN-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
182; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
183;
184; SI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
185; SI-SAFE:       ; %bb.0:
186; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
188; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
189; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
190; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
191; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
192; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
193; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
194; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
195; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
196; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
197; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
198; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
199; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v3, v0
200; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v4, v1
201; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v2, v5, v2
202; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
203;
204; SI-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
205; SI-NNAN:       ; %bb.0:
206; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
207; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
208; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
209; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
210; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
211; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
212; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
213; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
214; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
215; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
216; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
217; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
218; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
219; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v3
220; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v4
221; SI-NNAN-NEXT:    v_max_f32_e32 v2, v2, v5
222; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
223  %cmp = fcmp ugt <3 x half> %a, %b
224  %val = select <3 x i1> %cmp, <3 x half> %a, <3 x half> %b
225  ret <3 x half> %val
226}
227
228define <4 x half> @test_fmax_legacy_ugt_v4f16(<4 x half> %a, <4 x half> %b) #0 {
229; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
230; GFX9-SAFE:       ; %bb.0:
231; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
233; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
234; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v7, v6
235; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
236; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
237; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
238; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v5, v4
239; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
240; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v3
241; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
242; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v2
243; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
244; GFX9-SAFE-NEXT:    v_mov_b32_e32 v2, 0xffff
245; GFX9-SAFE-NEXT:    v_and_b32_e32 v0, v2, v0
246; GFX9-SAFE-NEXT:    v_and_b32_e32 v1, v2, v1
247; GFX9-SAFE-NEXT:    v_lshl_or_b32 v0, v4, 16, v0
248; GFX9-SAFE-NEXT:    v_lshl_or_b32 v1, v6, 16, v1
249; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
250;
251; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v4f16:
252; GFX9-NNAN:       ; %bb.0:
253; GFX9-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254; GFX9-NNAN-NEXT:    v_pk_max_f16 v0, v0, v2
255; GFX9-NNAN-NEXT:    v_pk_max_f16 v1, v1, v3
256; GFX9-NNAN-NEXT:    s_setpc_b64 s[30:31]
257;
258; VI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
259; VI-SAFE:       ; %bb.0:
260; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
262; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
263; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v7, v6
264; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
265; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
266; VI-SAFE-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
267; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v5, v4
268; VI-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
269; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v3
270; VI-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
271; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v2
272; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
273; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
274; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
275; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v2, 16, v6
276; VI-SAFE-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
277; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
278;
279; VI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16:
280; VI-NNAN:       ; %bb.0:
281; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282; VI-NNAN-NEXT:    v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
283; VI-NNAN-NEXT:    v_max_f16_e32 v1, v1, v3
284; VI-NNAN-NEXT:    v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
285; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v2
286; VI-NNAN-NEXT:    v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
287; VI-NNAN-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
288; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
289;
290; SI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
291; SI-SAFE:       ; %bb.0:
292; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
294; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v7, v7
295; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
296; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v6, v6
297; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
298; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
299; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
300; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
301; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
302; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v7, v7
303; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
304; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v6
305; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
306; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
307; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
308; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
309; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v4, v0
310; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v5, v1
311; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v2, v6, v2
312; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v3, v7, v3
313; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
314;
315; SI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16:
316; SI-NNAN:       ; %bb.0:
317; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v7, v7
319; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
320; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v6, v6
321; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
322; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
323; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
324; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
325; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
326; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v7, v7
327; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
328; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v6
329; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
330; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
331; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
332; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
333; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
334; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v4
335; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v5
336; SI-NNAN-NEXT:    v_max_f32_e32 v2, v2, v6
337; SI-NNAN-NEXT:    v_max_f32_e32 v3, v3, v7
338; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
339  %cmp = fcmp ugt <4 x half> %a, %b
340  %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
341  ret <4 x half> %val
342}
343
344define <8 x half> @test_fmax_legacy_ugt_v8f16(<8 x half> %a, <8 x half> %b) #0 {
345; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
346; GFX9-SAFE:       ; %bb.0:
347; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
348; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v14, 16, v7
349; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v15, 16, v3
350; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v15, v14
351; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v12, 16, v6
352; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v13, 16, v2
353; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v14, v14, v15, vcc
354; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v13, v12
355; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
356; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
357; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
358; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v11, v10
359; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
360; GFX9-SAFE-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
361; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
362; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v9, v8
363; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
364; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v3, v7
365; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
366; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v2, v6
367; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
368; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v5
369; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
370; GFX9-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v4
371; GFX9-SAFE-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
372; GFX9-SAFE-NEXT:    v_mov_b32_e32 v4, 0xffff
373; GFX9-SAFE-NEXT:    v_and_b32_e32 v0, v4, v0
374; GFX9-SAFE-NEXT:    v_and_b32_e32 v1, v4, v1
375; GFX9-SAFE-NEXT:    v_and_b32_e32 v2, v4, v2
376; GFX9-SAFE-NEXT:    v_and_b32_e32 v3, v4, v3
377; GFX9-SAFE-NEXT:    v_lshl_or_b32 v0, v8, 16, v0
378; GFX9-SAFE-NEXT:    v_lshl_or_b32 v1, v10, 16, v1
379; GFX9-SAFE-NEXT:    v_lshl_or_b32 v2, v12, 16, v2
380; GFX9-SAFE-NEXT:    v_lshl_or_b32 v3, v14, 16, v3
381; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
382;
383; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v8f16:
384; GFX9-NNAN:       ; %bb.0:
385; GFX9-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
386; GFX9-NNAN-NEXT:    v_pk_max_f16 v0, v0, v4
387; GFX9-NNAN-NEXT:    v_pk_max_f16 v1, v1, v5
388; GFX9-NNAN-NEXT:    v_pk_max_f16 v2, v2, v6
389; GFX9-NNAN-NEXT:    v_pk_max_f16 v3, v3, v7
390; GFX9-NNAN-NEXT:    s_setpc_b64 s[30:31]
391;
392; VI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
393; VI-SAFE:       ; %bb.0:
394; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v14, 16, v7
396; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v15, 16, v3
397; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v15, v14
398; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v12, 16, v6
399; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v13, 16, v2
400; VI-SAFE-NEXT:    v_cndmask_b32_e32 v14, v14, v15, vcc
401; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v13, v12
402; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v10, 16, v5
403; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
404; VI-SAFE-NEXT:    v_cndmask_b32_e32 v12, v12, v13, vcc
405; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v11, v10
406; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
407; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
408; VI-SAFE-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
409; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v9, v8
410; VI-SAFE-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
411; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v3, v7
412; VI-SAFE-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
413; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v2, v6
414; VI-SAFE-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc
415; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v1, v5
416; VI-SAFE-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
417; VI-SAFE-NEXT:    v_cmp_nle_f16_e32 vcc, v0, v4
418; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
419; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v4, 16, v8
420; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
421; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v4, 16, v10
422; VI-SAFE-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
423; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v4, 16, v12
424; VI-SAFE-NEXT:    v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
425; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v4, 16, v14
426; VI-SAFE-NEXT:    v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
427; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
428;
429; VI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16:
430; VI-NNAN:       ; %bb.0:
431; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
432; VI-NNAN-NEXT:    v_max_f16_sdwa v8, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
433; VI-NNAN-NEXT:    v_max_f16_e32 v3, v3, v7
434; VI-NNAN-NEXT:    v_max_f16_sdwa v9, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
435; VI-NNAN-NEXT:    v_max_f16_e32 v2, v2, v6
436; VI-NNAN-NEXT:    v_max_f16_sdwa v10, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
437; VI-NNAN-NEXT:    v_max_f16_e32 v1, v1, v5
438; VI-NNAN-NEXT:    v_max_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
439; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v4
440; VI-NNAN-NEXT:    v_or_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
441; VI-NNAN-NEXT:    v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
442; VI-NNAN-NEXT:    v_or_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
443; VI-NNAN-NEXT:    v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
444; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
445;
446; SI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
447; SI-SAFE:       ; %bb.0:
448; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v7, v7
450; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v15, v15
451; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v6, v6
452; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v14, v14
453; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
454; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v13, v13
455; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
456; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v12, v12
457; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
458; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v11, v11
459; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
460; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v10, v10
461; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
462; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v9, v9
463; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
464; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v8, v8
465; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v7, v7
466; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v15, v15
467; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v6
468; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v14, v14
469; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
470; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v13, v13
471; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
472; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v12, v12
473; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
474; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v11, v11
475; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
476; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v10, v10
477; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
478; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v9, v9
479; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
480; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v8, v8
481; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v0, v8, v0
482; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v1, v9, v1
483; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v2, v10, v2
484; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v3, v11, v3
485; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v4, v12, v4
486; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v5, v13, v5
487; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v6, v14, v6
488; SI-SAFE-NEXT:    v_max_legacy_f32_e32 v7, v15, v7
489; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
490;
491; SI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16:
492; SI-NNAN:       ; %bb.0:
493; SI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v15, v15
495; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v7, v7
496; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v14, v14
497; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v6, v6
498; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v13, v13
499; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v5, v5
500; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v12, v12
501; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v4, v4
502; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v11, v11
503; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v3, v3
504; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v10, v10
505; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v2, v2
506; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v9, v9
507; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v1, v1
508; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v8, v8
509; SI-NNAN-NEXT:    v_cvt_f16_f32_e32 v0, v0
510; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v15, v15
511; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v7, v7
512; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v14, v14
513; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v6, v6
514; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v13, v13
515; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v5, v5
516; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v12, v12
517; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v4, v4
518; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v11, v11
519; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v3, v3
520; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v10, v10
521; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v2, v2
522; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v9, v9
523; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v1, v1
524; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v8, v8
525; SI-NNAN-NEXT:    v_cvt_f32_f16_e32 v0, v0
526; SI-NNAN-NEXT:    v_max_f32_e32 v0, v0, v8
527; SI-NNAN-NEXT:    v_max_f32_e32 v1, v1, v9
528; SI-NNAN-NEXT:    v_max_f32_e32 v2, v2, v10
529; SI-NNAN-NEXT:    v_max_f32_e32 v3, v3, v11
530; SI-NNAN-NEXT:    v_max_f32_e32 v4, v4, v12
531; SI-NNAN-NEXT:    v_max_f32_e32 v5, v5, v13
532; SI-NNAN-NEXT:    v_max_f32_e32 v6, v6, v14
533; SI-NNAN-NEXT:    v_max_f32_e32 v7, v7, v15
534; SI-NNAN-NEXT:    s_setpc_b64 s[30:31]
535  %cmp = fcmp ugt <8 x half> %a, %b
536  %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b
537  ret <8 x half> %val
538}
539
540attributes #0 = { nounwind }
541