1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SAFE %s 3; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-NNAN %s 4 5; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VI-SAFE %s 6; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NNAN %s 7 8; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI-SAFE %s 9; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NNAN %s 10 11define half @test_fmax_legacy_ugt_f16(half %a, half %b) #0 { 12; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_f16: 13; GFX9-SAFE: ; %bb.0: 14; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1 16; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 17; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 18; 19; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_f16: 20; GFX9-NNAN: ; %bb.0: 21; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX9-NNAN-NEXT: v_max_f16_e32 v0, v0, v1 23; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 24; 25; VI-SAFE-LABEL: test_fmax_legacy_ugt_f16: 26; VI-SAFE: ; %bb.0: 27; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1 29; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 30; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 31; 32; VI-NNAN-LABEL: test_fmax_legacy_ugt_f16: 33; VI-NNAN: ; %bb.0: 34; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v1 36; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 37; 38; SI-SAFE-LABEL: test_fmax_legacy_ugt_f16: 39; SI-SAFE: ; %bb.0: 40; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 42; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 43; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 44; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 45; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v1, v0 46; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 47; 48; SI-NNAN-LABEL: test_fmax_legacy_ugt_f16: 49; SI-NNAN: ; %bb.0: 50; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 51; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 52; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 53; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 54; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 55; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v1 56; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 57 %cmp = fcmp ugt half %a, %b 58 %val = select i1 %cmp, half %a, half %b 59 ret half %val 60} 61 62define <2 x half> @test_fmax_legacy_ugt_v2f16(<2 x half> %a, <2 x half> %b) #0 { 63; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v2f16: 64; GFX9-SAFE: ; %bb.0: 65; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 66; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1 67; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 68; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v2 69; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 70; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1 71; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 72; GFX9-SAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0 73; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v2, 16, v0 74; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 75; 76; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v2f16: 77; GFX9-NNAN: ; %bb.0: 78; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v1 80; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 81; 82; VI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16: 83; VI-SAFE: ; %bb.0: 84; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 85; VI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v1 86; VI-SAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 87; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v2 88; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 89; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v1 90; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v2 91; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 92; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 93; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 94; 95; VI-NNAN-LABEL: test_fmax_legacy_ugt_v2f16: 96; VI-NNAN: ; %bb.0: 97; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 98; VI-NNAN-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 99; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v1 100; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 101; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 102; 103; SI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16: 104; SI-SAFE: ; %bb.0: 105; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 107; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 108; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 109; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 110; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 111; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 112; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 113; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 114; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v2, v0 115; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v3, v1 116; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 117; 118; SI-NNAN-LABEL: test_fmax_legacy_ugt_v2f16: 119; SI-NNAN: ; %bb.0: 120; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 122; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 123; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 124; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 125; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 126; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 127; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 128; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 129; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v2 130; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v3 131; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 132 %cmp = fcmp ugt <2 x half> %a, %b 133 %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b 134 ret <2 x half> %val 135} 136 137define <3 x half> @test_fmax_legacy_ugt_v3f16(<3 x half> %a, <3 x half> %b) #0 { 138; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v3f16: 139; GFX9-SAFE: ; %bb.0: 140; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 141; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 142; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 143; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4 144; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 145; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3 146; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 147; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2 148; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 149; GFX9-SAFE-NEXT: v_and_b32_e32 v0, 0xffff, v0 150; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v4, 16, v0 151; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 152; 153; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v3f16: 154; GFX9-NNAN: ; %bb.0: 155; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 156; GFX9-NNAN-NEXT: v_pk_max_f16 v1, v1, v3 157; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v2 158; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 159; 160; VI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16: 161; VI-SAFE: ; %bb.0: 162; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 163; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 164; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 165; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4 166; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 167; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3 168; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 169; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2 170; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 171; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4 172; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 173; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 174; 175; VI-NNAN-LABEL: test_fmax_legacy_ugt_v3f16: 176; VI-NNAN: ; %bb.0: 177; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; VI-NNAN-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 179; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v2 180; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v3 181; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 182; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 183; 184; SI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16: 185; SI-SAFE: ; %bb.0: 186; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 187; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 188; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 189; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 190; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 191; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 192; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 193; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 194; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 195; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 196; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 197; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 198; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 199; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v3, v0 200; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v4, v1 201; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v5, v2 202; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 203; 204; SI-NNAN-LABEL: test_fmax_legacy_ugt_v3f16: 205; SI-NNAN: ; %bb.0: 206; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 207; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 208; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 209; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 210; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 211; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 212; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 213; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 214; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 215; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 216; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 217; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 218; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 219; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v3 220; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v4 221; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v5 222; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 223 %cmp = fcmp ugt <3 x half> %a, %b 224 %val = select <3 x i1> %cmp, <3 x half> %a, <3 x half> %b 225 ret <3 x half> %val 226} 227 228define <4 x half> @test_fmax_legacy_ugt_v4f16(<4 x half> %a, <4 x half> %b) #0 { 229; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v4f16: 230; GFX9-SAFE: ; %bb.0: 231; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 232; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3 233; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1 234; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v7, v6 235; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 236; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 237; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 238; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4 239; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 240; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3 241; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 242; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2 243; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 244; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0xffff 245; GFX9-SAFE-NEXT: v_and_b32_e32 v0, v2, v0 246; GFX9-SAFE-NEXT: v_and_b32_e32 v1, v2, v1 247; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v4, 16, v0 248; GFX9-SAFE-NEXT: v_lshl_or_b32 v1, v6, 16, v1 249; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 250; 251; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v4f16: 252; GFX9-NNAN: ; %bb.0: 253; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 254; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v2 255; GFX9-NNAN-NEXT: v_pk_max_f16 v1, v1, v3 256; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 257; 258; VI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16: 259; VI-SAFE: ; %bb.0: 260; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 261; VI-SAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v3 262; VI-SAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v1 263; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v7, v6 264; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 265; VI-SAFE-NEXT: v_lshrrev_b32_e32 v5, 16, v0 266; VI-SAFE-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 267; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v5, v4 268; VI-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 269; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v3 270; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 271; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v2 272; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 273; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v4 274; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 275; VI-SAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v6 276; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 277; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 278; 279; VI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16: 280; VI-NNAN: ; %bb.0: 281; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 282; VI-NNAN-NEXT: v_max_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 283; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v3 284; VI-NNAN-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 285; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v2 286; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 287; VI-NNAN-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 288; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 289; 290; SI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16: 291; SI-SAFE: ; %bb.0: 292; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 293; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 294; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 295; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 296; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 297; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 298; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 299; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 300; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 301; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 302; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 303; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 304; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 305; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 306; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 307; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 308; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 309; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v4, v0 310; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v5, v1 311; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v6, v2 312; SI-SAFE-NEXT: v_max_legacy_f32_e32 v3, v7, v3 313; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 314; 315; SI-NNAN-LABEL: test_fmax_legacy_ugt_v4f16: 316; SI-NNAN: ; %bb.0: 317; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 318; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 319; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 320; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 321; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 322; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 323; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 324; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 325; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 326; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 327; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 328; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 329; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 330; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 331; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 332; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 333; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 334; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v4 335; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v5 336; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v6 337; SI-NNAN-NEXT: v_max_f32_e32 v3, v3, v7 338; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 339 %cmp = fcmp ugt <4 x half> %a, %b 340 %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b 341 ret <4 x half> %val 342} 343 344define <8 x half> @test_fmax_legacy_ugt_v8f16(<8 x half> %a, <8 x half> %b) #0 { 345; GFX9-SAFE-LABEL: test_fmax_legacy_ugt_v8f16: 346; GFX9-SAFE: ; %bb.0: 347; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 348; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7 349; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3 350; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v15, v14 351; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6 352; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2 353; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc 354; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v13, v12 355; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5 356; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1 357; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 358; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v11, v10 359; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 360; GFX9-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0 361; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc 362; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v9, v8 363; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc 364; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v7 365; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc 366; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v2, v6 367; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 368; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v5 369; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 370; GFX9-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v4 371; GFX9-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 372; GFX9-SAFE-NEXT: v_mov_b32_e32 v4, 0xffff 373; GFX9-SAFE-NEXT: v_and_b32_e32 v0, v4, v0 374; GFX9-SAFE-NEXT: v_and_b32_e32 v1, v4, v1 375; GFX9-SAFE-NEXT: v_and_b32_e32 v2, v4, v2 376; GFX9-SAFE-NEXT: v_and_b32_e32 v3, v4, v3 377; GFX9-SAFE-NEXT: v_lshl_or_b32 v0, v8, 16, v0 378; GFX9-SAFE-NEXT: v_lshl_or_b32 v1, v10, 16, v1 379; GFX9-SAFE-NEXT: v_lshl_or_b32 v2, v12, 16, v2 380; GFX9-SAFE-NEXT: v_lshl_or_b32 v3, v14, 16, v3 381; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] 382; 383; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v8f16: 384; GFX9-NNAN: ; %bb.0: 385; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 386; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v4 387; GFX9-NNAN-NEXT: v_pk_max_f16 v1, v1, v5 388; GFX9-NNAN-NEXT: v_pk_max_f16 v2, v2, v6 389; GFX9-NNAN-NEXT: v_pk_max_f16 v3, v3, v7 390; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31] 391; 392; VI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16: 393; VI-SAFE: ; %bb.0: 394; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 395; VI-SAFE-NEXT: v_lshrrev_b32_e32 v14, 16, v7 396; VI-SAFE-NEXT: v_lshrrev_b32_e32 v15, 16, v3 397; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v15, v14 398; VI-SAFE-NEXT: v_lshrrev_b32_e32 v12, 16, v6 399; VI-SAFE-NEXT: v_lshrrev_b32_e32 v13, 16, v2 400; VI-SAFE-NEXT: v_cndmask_b32_e32 v14, v14, v15, vcc 401; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v13, v12 402; VI-SAFE-NEXT: v_lshrrev_b32_e32 v10, 16, v5 403; VI-SAFE-NEXT: v_lshrrev_b32_e32 v11, 16, v1 404; VI-SAFE-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc 405; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v11, v10 406; VI-SAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 407; VI-SAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v0 408; VI-SAFE-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc 409; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v9, v8 410; VI-SAFE-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc 411; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v3, v7 412; VI-SAFE-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc 413; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v2, v6 414; VI-SAFE-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc 415; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v1, v5 416; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 417; VI-SAFE-NEXT: v_cmp_nle_f16_e32 vcc, v0, v4 418; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 419; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v8 420; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 421; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v10 422; VI-SAFE-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 423; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v12 424; VI-SAFE-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 425; VI-SAFE-NEXT: v_lshlrev_b32_e32 v4, 16, v14 426; VI-SAFE-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 427; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 428; 429; VI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16: 430; VI-NNAN: ; %bb.0: 431; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 432; VI-NNAN-NEXT: v_max_f16_sdwa v8, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 433; VI-NNAN-NEXT: v_max_f16_e32 v3, v3, v7 434; VI-NNAN-NEXT: v_max_f16_sdwa v9, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 435; VI-NNAN-NEXT: v_max_f16_e32 v2, v2, v6 436; VI-NNAN-NEXT: v_max_f16_sdwa v10, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 437; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v5 438; VI-NNAN-NEXT: v_max_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 439; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v4 440; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 441; VI-NNAN-NEXT: v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 442; VI-NNAN-NEXT: v_or_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 443; VI-NNAN-NEXT: v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 444; VI-NNAN-NEXT: s_setpc_b64 s[30:31] 445; 446; SI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16: 447; SI-SAFE: ; %bb.0: 448; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 449; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v7, v7 450; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v15, v15 451; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v6, v6 452; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v14, v14 453; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v5, v5 454; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v13, v13 455; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v4, v4 456; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v12, v12 457; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 458; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v11, v11 459; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 460; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v10, v10 461; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 462; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v9, v9 463; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, v0 464; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v8, v8 465; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v7, v7 466; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v15, v15 467; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v6, v6 468; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v14, v14 469; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v5, v5 470; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v13, v13 471; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v4, v4 472; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v12, v12 473; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 474; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v11, v11 475; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 476; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v10, v10 477; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 478; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v9, v9 479; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0 480; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v8, v8 481; SI-SAFE-NEXT: v_max_legacy_f32_e32 v0, v8, v0 482; SI-SAFE-NEXT: v_max_legacy_f32_e32 v1, v9, v1 483; SI-SAFE-NEXT: v_max_legacy_f32_e32 v2, v10, v2 484; SI-SAFE-NEXT: v_max_legacy_f32_e32 v3, v11, v3 485; SI-SAFE-NEXT: v_max_legacy_f32_e32 v4, v12, v4 486; SI-SAFE-NEXT: v_max_legacy_f32_e32 v5, v13, v5 487; SI-SAFE-NEXT: v_max_legacy_f32_e32 v6, v14, v6 488; SI-SAFE-NEXT: v_max_legacy_f32_e32 v7, v15, v7 489; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 490; 491; SI-NNAN-LABEL: test_fmax_legacy_ugt_v8f16: 492; SI-NNAN: ; %bb.0: 493; SI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 494; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v15, v15 495; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v7, v7 496; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v14, v14 497; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v6, v6 498; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v13, v13 499; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v5, v5 500; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v12, v12 501; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v4, v4 502; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v11, v11 503; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v3, v3 504; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v10, v10 505; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v2, v2 506; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v9, v9 507; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v1, v1 508; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v8, v8 509; SI-NNAN-NEXT: v_cvt_f16_f32_e32 v0, v0 510; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v15, v15 511; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v7, v7 512; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v14, v14 513; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v6, v6 514; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v13, v13 515; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v5, v5 516; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v12, v12 517; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v4, v4 518; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v11, v11 519; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v3, v3 520; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v10, v10 521; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v2, v2 522; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v9, v9 523; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v1, v1 524; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v8, v8 525; SI-NNAN-NEXT: v_cvt_f32_f16_e32 v0, v0 526; SI-NNAN-NEXT: v_max_f32_e32 v0, v0, v8 527; SI-NNAN-NEXT: v_max_f32_e32 v1, v1, v9 528; SI-NNAN-NEXT: v_max_f32_e32 v2, v2, v10 529; SI-NNAN-NEXT: v_max_f32_e32 v3, v3, v11 530; SI-NNAN-NEXT: v_max_f32_e32 v4, v4, v12 531; SI-NNAN-NEXT: v_max_f32_e32 v5, v5, v13 532; SI-NNAN-NEXT: v_max_f32_e32 v6, v6, v14 533; SI-NNAN-NEXT: v_max_f32_e32 v7, v7, v15 534; SI-NNAN-NEXT: s_setpc_b64 s[30:31] 535 %cmp = fcmp ugt <8 x half> %a, %b 536 %val = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b 537 ret <8 x half> %val 538} 539 540attributes #0 = { nounwind } 541