• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4
5define <2 x i16> @v_mul_v2i16(<2 x i16> %a, <2 x i16> %b) {
6; GFX9-LABEL: v_mul_v2i16:
7; GFX9:       ; %bb.0:
8; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
10; GFX9-NEXT:    s_setpc_b64 s[30:31]
11;
12; GFX8-LABEL: v_mul_v2i16:
13; GFX8:       ; %bb.0:
14; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15; GFX8-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
16; GFX8-NEXT:    v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
17; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
18; GFX8-NEXT:    s_setpc_b64 s[30:31]
19  %mul = mul <2 x i16> %a, %b
20  ret <2 x i16> %mul
21}
22
23define <2 x i16> @v_mul_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) {
24; GFX9-LABEL: v_mul_v2i16_fneg_lhs:
25; GFX9:       ; %bb.0:
26; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
28; GFX9-NEXT:    s_setpc_b64 s[30:31]
29;
30; GFX8-LABEL: v_mul_v2i16_fneg_lhs:
31; GFX8:       ; %bb.0:
32; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
34; GFX8-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
35; GFX8-NEXT:    v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
36; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
37; GFX8-NEXT:    s_setpc_b64 s[30:31]
38  %neg.a = fneg <2 x half> %a
39  %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
40  %mul = mul <2 x i16> %cast.neg.a, %b
41  ret <2 x i16> %mul
42}
43
44define <2 x i16> @v_mul_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) {
45; GFX9-LABEL: v_mul_v2i16_fneg_rhs:
46; GFX9:       ; %bb.0:
47; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
49; GFX9-NEXT:    s_setpc_b64 s[30:31]
50;
51; GFX8-LABEL: v_mul_v2i16_fneg_rhs:
52; GFX8:       ; %bb.0:
53; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
55; GFX8-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
56; GFX8-NEXT:    v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
57; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
58; GFX8-NEXT:    s_setpc_b64 s[30:31]
59  %neg.b = fneg <2 x half> %b
60  %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
61  %mul = mul <2 x i16> %a, %cast.neg.b
62  ret <2 x i16> %mul
63}
64
65define <2 x i16> @v_mul_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
66; GFX9-LABEL: v_mul_v2i16_fneg_lhs_fneg_rhs:
67; GFX9:       ; %bb.0:
68; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
70; GFX9-NEXT:    s_setpc_b64 s[30:31]
71;
72; GFX8-LABEL: v_mul_v2i16_fneg_lhs_fneg_rhs:
73; GFX8:       ; %bb.0:
74; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX8-NEXT:    s_mov_b32 s4, 0x80008000
76; GFX8-NEXT:    v_xor_b32_e32 v0, s4, v0
77; GFX8-NEXT:    v_xor_b32_e32 v1, s4, v1
78; GFX8-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
79; GFX8-NEXT:    v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
80; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
81; GFX8-NEXT:    s_setpc_b64 s[30:31]
82  %neg.a = fneg <2 x half> %a
83  %neg.b = fneg <2 x half> %b
84  %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
85  %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
86  %mul = mul <2 x i16> %cast.neg.a, %cast.neg.b
87  ret <2 x i16> %mul
88}
89