• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906 %s
3; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
4; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
5
6define float @v_fdot2(<2 x half> %a, <2 x half> %b, float %c) {
7; GFX906-LABEL: v_fdot2:
8; GFX906:       ; %bb.0:
9; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2
11; GFX906-NEXT:    s_setpc_b64 s[30:31]
12;
13; GFX10-LABEL: v_fdot2:
14; GFX10:       ; %bb.0:
15; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
17; GFX10-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2
18; GFX10-NEXT:    s_setpc_b64 s[30:31]
19  %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 false)
20  ret float %r
21}
22
23define float @v_fdot2_clamp(<2 x half> %a, <2 x half> %b, float %c) {
24; GFX906-LABEL: v_fdot2_clamp:
25; GFX906:       ; %bb.0:
26; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 clamp
28; GFX906-NEXT:    s_setpc_b64 s[30:31]
29;
30; GFX10-LABEL: v_fdot2_clamp:
31; GFX10:       ; %bb.0:
32; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
34; GFX10-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 clamp
35; GFX10-NEXT:    s_setpc_b64 s[30:31]
36  %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 true)
37  ret float %r
38}
39
40define float @v_fdot2_neg_a(<2 x half> %a, <2 x half> %b, float %c) {
41; GFX906-LABEL: v_fdot2_neg_a:
42; GFX906:       ; %bb.0:
43; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
45; GFX906-NEXT:    s_setpc_b64 s[30:31]
46;
47; GFX10-LABEL: v_fdot2_neg_a:
48; GFX10:       ; %bb.0:
49; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
51; GFX10-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
52; GFX10-NEXT:    s_setpc_b64 s[30:31]
53  %neg.a = fneg <2 x half> %a
54  %r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %b, float %c, i1 false)
55  ret float %r
56}
57
58define float @v_fdot2_neg_b(<2 x half> %a, <2 x half> %b, float %c) {
59; GFX906-LABEL: v_fdot2_neg_b:
60; GFX906:       ; %bb.0:
61; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
63; GFX906-NEXT:    s_setpc_b64 s[30:31]
64;
65; GFX10-LABEL: v_fdot2_neg_b:
66; GFX10:       ; %bb.0:
67; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
69; GFX10-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
70; GFX10-NEXT:    s_setpc_b64 s[30:31]
71  %neg.b = fneg <2 x half> %b
72  %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %neg.b, float %c, i1 false)
73  ret float %r
74}
75
76define float @v_fdot2_neg_a_neg_b(<2 x half> %a, <2 x half> %b, float %c) {
77; GFX906-LABEL: v_fdot2_neg_a_neg_b:
78; GFX906:       ; %bb.0:
79; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80; GFX906-NEXT:    v_dot2_f32_f16 v0, v1, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0]
81; GFX906-NEXT:    s_setpc_b64 s[30:31]
82;
83; GFX10-LABEL: v_fdot2_neg_a_neg_b:
84; GFX10:       ; %bb.0:
85; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
87; GFX10-NEXT:    v_dot2_f32_f16 v0, v1, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0]
88; GFX10-NEXT:    s_setpc_b64 s[30:31]
89  %neg.a = fneg <2 x half> %b
90  %neg.b = fneg <2 x half> %b
91  %r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %neg.b, float %c, i1 false)
92  ret float %r
93}
94
95define float @v_fdot2_neg_c(<2 x half> %a, <2 x half> %b, float %c) {
96; GFX906-LABEL: v_fdot2_neg_c:
97; GFX906:       ; %bb.0:
98; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99; GFX906-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
100; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2
101; GFX906-NEXT:    s_setpc_b64 s[30:31]
102;
103; GFX10-LABEL: v_fdot2_neg_c:
104; GFX10:       ; %bb.0:
105; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
107; GFX10-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
108; GFX10-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2
109; GFX10-NEXT:    s_setpc_b64 s[30:31]
110  %neg.c = fneg float %c
111  %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %neg.c, i1 false)
112  ret float %r
113}
114
115define float @v_fdot2_inline_literal_a(<2 x half> %b, float %c) {
116; GFX906-LABEL: v_fdot2_inline_literal_a:
117; GFX906:       ; %bb.0:
118; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119; GFX906-NEXT:    s_movk_i32 s4, 0x4000
120; GFX906-NEXT:    s_pack_ll_b32_b16 s4, s4, s4
121; GFX906-NEXT:    v_dot2_f32_f16 v0, s4, v0, v1
122; GFX906-NEXT:    s_setpc_b64 s[30:31]
123;
124; GFX10-LABEL: v_fdot2_inline_literal_a:
125; GFX10:       ; %bb.0:
126; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
128; GFX10-NEXT:    s_movk_i32 s4, 0x4000
129; GFX10-NEXT:    s_pack_ll_b32_b16 s4, s4, s4
130; GFX10-NEXT:    v_dot2_f32_f16 v0, s4, v0, v1
131; GFX10-NEXT:    s_setpc_b64 s[30:31]
132  %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> <half 2.0, half 2.0>, <2 x half> %b, float %c, i1 false)
133  ret float %ret
134}
135
136define float @v_fdot2_inline_literal_b(<2 x half> %a, float %c) {
137; GFX906-LABEL: v_fdot2_inline_literal_b:
138; GFX906:       ; %bb.0:
139; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140; GFX906-NEXT:    s_movk_i32 s4, 0x4000
141; GFX906-NEXT:    s_pack_ll_b32_b16 s4, s4, s4
142; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, s4, v1
143; GFX906-NEXT:    s_setpc_b64 s[30:31]
144;
145; GFX10-LABEL: v_fdot2_inline_literal_b:
146; GFX10:       ; %bb.0:
147; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
149; GFX10-NEXT:    s_movk_i32 s4, 0x4000
150; GFX10-NEXT:    s_pack_ll_b32_b16 s4, s4, s4
151; GFX10-NEXT:    v_dot2_f32_f16 v0, v0, s4, v1
152; GFX10-NEXT:    s_setpc_b64 s[30:31]
153  %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> <half 2.0, half 2.0>, float %c, i1 false)
154  ret float %ret
155}
156
157define float @v_fdot2_inline_literal_c(<2 x half> %a, <2 x half> %b) {
158; GFX906-LABEL: v_fdot2_inline_literal_c:
159; GFX906:       ; %bb.0:
160; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, 1.0
162; GFX906-NEXT:    s_setpc_b64 s[30:31]
163;
164; GFX10-LABEL: v_fdot2_inline_literal_c:
165; GFX10:       ; %bb.0:
166; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
167; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
168; GFX10-NEXT:    v_dot2_f32_f16 v0, v0, v1, 1.0
169; GFX10-NEXT:    s_setpc_b64 s[30:31]
170  %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 1.0, i1 false)
171  ret float %ret
172}
173
174declare float @llvm.amdgcn.fdot2(<2 x half>, <2 x half>, float, i1 immarg) #0
175
176attributes #0 = { nounwind readnone speculatable }
177