1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s 4 5define <2 x i16> @v_mul_v2i16(<2 x i16> %a, <2 x i16> %b) { 6; GFX9-LABEL: v_mul_v2i16: 7; GFX9: ; %bb.0: 8; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1 10; GFX9-NEXT: s_setpc_b64 s[30:31] 11; 12; GFX8-LABEL: v_mul_v2i16: 13; GFX8: ; %bb.0: 14; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15; GFX8-NEXT: v_mul_lo_u16_e32 v2, v0, v1 16; GFX8-NEXT: v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 17; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 18; GFX8-NEXT: s_setpc_b64 s[30:31] 19 %mul = mul <2 x i16> %a, %b 20 ret <2 x i16> %mul 21} 22 23define <2 x i16> @v_mul_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) { 24; GFX9-LABEL: v_mul_v2i16_fneg_lhs: 25; GFX9: ; %bb.0: 26; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] 28; GFX9-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX8-LABEL: v_mul_v2i16_fneg_lhs: 31; GFX8: ; %bb.0: 32; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 34; GFX8-NEXT: v_mul_lo_u16_e32 v2, v0, v1 35; GFX8-NEXT: v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 36; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 37; GFX8-NEXT: s_setpc_b64 s[30:31] 38 %neg.a = fneg <2 x half> %a 39 %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16> 40 %mul = mul <2 x i16> %cast.neg.a, %b 41 ret <2 x i16> %mul 42} 43 44define <2 x i16> @v_mul_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) { 45; GFX9-LABEL: v_mul_v2i16_fneg_rhs: 46; GFX9: ; %bb.0: 47; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] 49; GFX9-NEXT: s_setpc_b64 s[30:31] 50; 51; GFX8-LABEL: v_mul_v2i16_fneg_rhs: 52; GFX8: ; %bb.0: 53; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 54; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 55; GFX8-NEXT: v_mul_lo_u16_e32 v2, v0, v1 56; GFX8-NEXT: v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 57; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 58; GFX8-NEXT: s_setpc_b64 s[30:31] 59 %neg.b = fneg <2 x half> %b 60 %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16> 61 %mul = mul <2 x i16> %a, %cast.neg.b 62 ret <2 x i16> %mul 63} 64 65define <2 x i16> @v_mul_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) { 66; GFX9-LABEL: v_mul_v2i16_fneg_lhs_fneg_rhs: 67; GFX9: ; %bb.0: 68; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 69; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1] 70; GFX9-NEXT: s_setpc_b64 s[30:31] 71; 72; GFX8-LABEL: v_mul_v2i16_fneg_lhs_fneg_rhs: 73; GFX8: ; %bb.0: 74; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX8-NEXT: s_mov_b32 s4, 0x80008000 76; GFX8-NEXT: v_xor_b32_e32 v0, s4, v0 77; GFX8-NEXT: v_xor_b32_e32 v1, s4, v1 78; GFX8-NEXT: v_mul_lo_u16_e32 v2, v0, v1 79; GFX8-NEXT: v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 80; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 81; GFX8-NEXT: s_setpc_b64 s[30:31] 82 %neg.a = fneg <2 x half> %a 83 %neg.b = fneg <2 x half> %b 84 %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16> 85 %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16> 86 %mul = mul <2 x i16> %cast.neg.a, %cast.neg.b 87 ret <2 x i16> %mul 88} 89