1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; FIXME: promotion not handled without f16 insts 5 6define half @v_constained_fmul_f16_fpexcept_strict(half %x, half %y) #0 { 7; GCN-LABEL: v_constained_fmul_f16_fpexcept_strict: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GCN-NEXT: v_mul_f16_e32 v0, v0, v1 11; GCN-NEXT: s_setpc_b64 s[30:31] 12 %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 13 ret half %val 14} 15 16define half @v_constained_fmul_f16_fpexcept_ignore(half %x, half %y) #0 { 17; GCN-LABEL: v_constained_fmul_f16_fpexcept_ignore: 18; GCN: ; %bb.0: 19; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GCN-NEXT: v_mul_f16_e32 v0, v0, v1 21; GCN-NEXT: s_setpc_b64 s[30:31] 22 %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 23 ret half %val 24} 25 26define half @v_constained_fmul_f16_fpexcept_maytrap(half %x, half %y) #0 { 27; GCN-LABEL: v_constained_fmul_f16_fpexcept_maytrap: 28; GCN: ; %bb.0: 29; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 30; GCN-NEXT: v_mul_f16_e32 v0, v0, v1 31; GCN-NEXT: s_setpc_b64 s[30:31] 32 %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 33 ret half %val 34} 35 36define <2 x half> @v_constained_fmul_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y) #0 { 37; GFX9-LABEL: v_constained_fmul_v2f16_fpexcept_strict: 38; GFX9: ; %bb.0: 39; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 40; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 41; GFX9-NEXT: s_setpc_b64 s[30:31] 42; 43; GFX8-LABEL: v_constained_fmul_v2f16_fpexcept_strict: 44; GFX8: ; %bb.0: 45; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GFX8-NEXT: v_mul_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 47; GFX8-NEXT: v_mul_f16_e32 v0, v0, v1 48; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 49; GFX8-NEXT: s_setpc_b64 s[30:31] 50 %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 51 ret <2 x half> %val 52} 53 54define <2 x half> @v_constained_fmul_v2f16_fpexcept_ignore(<2 x half> %x, <2 x half> %y) #0 { 55; GFX9-LABEL: v_constained_fmul_v2f16_fpexcept_ignore: 56; GFX9: ; %bb.0: 57; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 59; GFX9-NEXT: s_setpc_b64 s[30:31] 60; 61; GFX8-LABEL: v_constained_fmul_v2f16_fpexcept_ignore: 62; GFX8: ; %bb.0: 63; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 64; GFX8-NEXT: v_mul_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 65; GFX8-NEXT: v_mul_f16_e32 v0, v0, v1 66; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 67; GFX8-NEXT: s_setpc_b64 s[30:31] 68 %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 69 ret <2 x half> %val 70} 71 72define <2 x half> @v_constained_fmul_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x half> %y) #0 { 73; GFX9-LABEL: v_constained_fmul_v2f16_fpexcept_maytrap: 74; GFX9: ; %bb.0: 75; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 76; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 77; GFX9-NEXT: s_setpc_b64 s[30:31] 78; 79; GFX8-LABEL: v_constained_fmul_v2f16_fpexcept_maytrap: 80; GFX8: ; %bb.0: 81; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 82; GFX8-NEXT: v_mul_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 83; GFX8-NEXT: v_mul_f16_e32 v0, v0, v1 84; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 85; GFX8-NEXT: s_setpc_b64 s[30:31] 86 %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 87 ret <2 x half> %val 88} 89 90define <3 x half> @v_constained_fmul_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y) #0 { 91; GFX9-LABEL: v_constained_fmul_v3f16_fpexcept_strict: 92; GFX9: ; %bb.0: 93; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 94; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 95; GFX9-NEXT: v_mul_f16_e32 v1, v1, v3 96; GFX9-NEXT: s_setpc_b64 s[30:31] 97; 98; GFX8-LABEL: v_constained_fmul_v3f16_fpexcept_strict: 99; GFX8: ; %bb.0: 100; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 101; GFX8-NEXT: v_mul_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 102; GFX8-NEXT: v_mul_f16_e32 v0, v0, v2 103; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 104; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3 105; GFX8-NEXT: s_setpc_b64 s[30:31] 106 %val = call <3 x half> @llvm.experimental.constrained.fmul.v3f16(<3 x half> %x, <3 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 107 ret <3 x half> %val 108} 109 110; FIXME: Scalarized 111define <4 x half> @v_constained_fmul_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y) #0 { 112; GFX9-LABEL: v_constained_fmul_v4f16_fpexcept_strict: 113; GFX9: ; %bb.0: 114; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 115; GFX9-NEXT: v_mul_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 116; GFX9-NEXT: v_mul_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 117; GFX9-NEXT: v_mul_f16_e32 v0, v0, v2 118; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 119; GFX9-NEXT: v_mul_f16_e32 v1, v1, v3 120; GFX9-NEXT: v_and_b32_e32 v0, v2, v0 121; GFX9-NEXT: v_and_b32_e32 v1, v2, v1 122; GFX9-NEXT: v_lshl_or_b32 v0, v5, 16, v0 123; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v1 124; GFX9-NEXT: s_setpc_b64 s[30:31] 125; 126; GFX8-LABEL: v_constained_fmul_v4f16_fpexcept_strict: 127; GFX8: ; %bb.0: 128; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; GFX8-NEXT: v_mul_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 130; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3 131; GFX8-NEXT: v_mul_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 132; GFX8-NEXT: v_mul_f16_e32 v0, v0, v2 133; GFX8-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 134; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 135; GFX8-NEXT: s_setpc_b64 s[30:31] 136 %val = call <4 x half> @llvm.experimental.constrained.fmul.v4f16(<4 x half> %x, <4 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 137 ret <4 x half> %val 138} 139 140define amdgpu_ps half @s_constained_fmul_f16_fpexcept_strict(half inreg %x, half inreg %y) #0 { 141; GCN-LABEL: s_constained_fmul_f16_fpexcept_strict: 142; GCN: ; %bb.0: 143; GCN-NEXT: v_mov_b32_e32 v0, s3 144; GCN-NEXT: v_mul_f16_e32 v0, s2, v0 145; GCN-NEXT: ; return to shader part epilog 146 %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 147 ret half %val 148} 149 150define amdgpu_ps <2 x half> @s_constained_fmul_v2f16_fpexcept_strict(<2 x half> inreg %x, <2 x half> inreg %y) #0 { 151; GFX9-LABEL: s_constained_fmul_v2f16_fpexcept_strict: 152; GFX9: ; %bb.0: 153; GFX9-NEXT: v_mov_b32_e32 v0, s3 154; GFX9-NEXT: v_pk_mul_f16 v0, s2, v0 155; GFX9-NEXT: ; return to shader part epilog 156; 157; GFX8-LABEL: s_constained_fmul_v2f16_fpexcept_strict: 158; GFX8: ; %bb.0: 159; GFX8-NEXT: s_lshr_b32 s0, s3, 16 160; GFX8-NEXT: s_lshr_b32 s1, s2, 16 161; GFX8-NEXT: v_mov_b32_e32 v0, s0 162; GFX8-NEXT: v_mov_b32_e32 v1, s1 163; GFX8-NEXT: v_mul_f16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 164; GFX8-NEXT: v_mov_b32_e32 v1, s3 165; GFX8-NEXT: v_mul_f16_e32 v1, s2, v1 166; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 167; GFX8-NEXT: ; return to shader part epilog 168 %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 169 ret <2 x half> %val 170} 171 172declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata) #1 173declare <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half>, <2 x half>, metadata, metadata) #1 174declare <3 x half> @llvm.experimental.constrained.fmul.v3f16(<3 x half>, <3 x half>, metadata, metadata) #1 175declare <4 x half> @llvm.experimental.constrained.fmul.v4f16(<4 x half>, <4 x half>, metadata, metadata) #1 176 177attributes #0 = { strictfp } 178attributes #1 = { inaccessiblememonly nounwind willreturn } 179