• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4
5define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) {
6; GFX9-LABEL: v_add_v2i16:
7; GFX9:       ; %bb.0:
8; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; GFX9-NEXT:    v_pk_add_u16 v0, v0, v1
10; GFX9-NEXT:    s_setpc_b64 s[30:31]
11;
12; GFX8-LABEL: v_add_v2i16:
13; GFX8:       ; %bb.0:
14; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15; GFX8-NEXT:    v_add_u16_e32 v2, v0, v1
16; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
17; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
18; GFX8-NEXT:    s_setpc_b64 s[30:31]
19  %add = add <2 x i16> %a, %b
20  ret <2 x i16> %add
21}
22
23define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) {
24; GFX9-LABEL: v_add_v2i16_fneg_lhs:
25; GFX9:       ; %bb.0:
26; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GFX9-NEXT:    v_pk_add_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
28; GFX9-NEXT:    s_setpc_b64 s[30:31]
29;
30; GFX8-LABEL: v_add_v2i16_fneg_lhs:
31; GFX8:       ; %bb.0:
32; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
34; GFX8-NEXT:    v_add_u16_e32 v2, v0, v1
35; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
36; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
37; GFX8-NEXT:    s_setpc_b64 s[30:31]
38  %neg.a = fneg <2 x half> %a
39  %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
40  %add = add <2 x i16> %cast.neg.a, %b
41  ret <2 x i16> %add
42}
43
44define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) {
45; GFX9-LABEL: v_add_v2i16_fneg_rhs:
46; GFX9:       ; %bb.0:
47; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GFX9-NEXT:    v_pk_add_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
49; GFX9-NEXT:    s_setpc_b64 s[30:31]
50;
51; GFX8-LABEL: v_add_v2i16_fneg_rhs:
52; GFX8:       ; %bb.0:
53; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
55; GFX8-NEXT:    v_add_u16_e32 v2, v0, v1
56; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
57; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
58; GFX8-NEXT:    s_setpc_b64 s[30:31]
59  %neg.b = fneg <2 x half> %b
60  %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
61  %add = add <2 x i16> %a, %cast.neg.b
62  ret <2 x i16> %add
63}
64
65define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
66; GFX9-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
67; GFX9:       ; %bb.0:
68; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69; GFX9-NEXT:    v_pk_add_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
70; GFX9-NEXT:    s_setpc_b64 s[30:31]
71;
72; GFX8-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
73; GFX8:       ; %bb.0:
74; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX8-NEXT:    s_mov_b32 s4, 0x80008000
76; GFX8-NEXT:    v_xor_b32_e32 v0, s4, v0
77; GFX8-NEXT:    v_xor_b32_e32 v1, s4, v1
78; GFX8-NEXT:    v_add_u16_e32 v2, v0, v1
79; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
80; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
81; GFX8-NEXT:    s_setpc_b64 s[30:31]
82  %neg.a = fneg <2 x half> %a
83  %neg.b = fneg <2 x half> %b
84  %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
85  %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
86  %add = add <2 x i16> %cast.neg.a, %cast.neg.b
87  ret <2 x i16> %add
88}
89
90define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
91; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat:
92; GFX9:       ; %bb.0:
93; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffc0ffc0
95; GFX9-NEXT:    v_pk_add_u16 v0, v0, v1
96; GFX9-NEXT:    s_setpc_b64 s[30:31]
97;
98; GFX8-LABEL: v_add_v2i16_neg_inline_imm_splat:
99; GFX8:       ; %bb.0:
100; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101; GFX8-NEXT:    s_movk_i32 s4, 0xffc0
102; GFX8-NEXT:    v_mov_b32_e32 v2, s4
103; GFX8-NEXT:    v_add_u16_e32 v1, s4, v0
104; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
105; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
106; GFX8-NEXT:    s_setpc_b64 s[30:31]
107  %add = add <2 x i16> %a, <i16 -64, i16 -64>
108  ret <2 x i16> %add
109}
110
111define <2 x i16> @v_add_v2i16_neg_inline_imm_lo(<2 x i16> %a) {
112; GFX9-LABEL: v_add_v2i16_neg_inline_imm_lo:
113; GFX9:       ; %bb.0:
114; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115; GFX9-NEXT:    v_mov_b32_e32 v1, 0x4ffc0
116; GFX9-NEXT:    v_pk_add_u16 v0, v0, v1
117; GFX9-NEXT:    s_setpc_b64 s[30:31]
118;
119; GFX8-LABEL: v_add_v2i16_neg_inline_imm_lo:
120; GFX8:       ; %bb.0:
121; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122; GFX8-NEXT:    v_mov_b32_e32 v2, 4
123; GFX8-NEXT:    v_add_u16_e32 v1, 0xffc0, v0
124; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
125; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
126; GFX8-NEXT:    s_setpc_b64 s[30:31]
127  %add = add <2 x i16> %a, <i16 -64, i16 4>
128  ret <2 x i16> %add
129}
130
131define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
132; GFX9-LABEL: v_add_v2i16_neg_inline_imm_hi:
133; GFX9:       ; %bb.0:
134; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
135; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffc00004
136; GFX9-NEXT:    v_pk_add_u16 v0, v0, v1
137; GFX9-NEXT:    s_setpc_b64 s[30:31]
138;
139; GFX8-LABEL: v_add_v2i16_neg_inline_imm_hi:
140; GFX8:       ; %bb.0:
141; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffffffc0
143; GFX8-NEXT:    v_add_u16_e32 v2, 4, v0
144; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
145; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
146; GFX8-NEXT:    s_setpc_b64 s[30:31]
147  %add = add <2 x i16> %a, <i16 4, i16 -64>
148  ret <2 x i16> %add
149}
150
151define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
152; GFX9-LABEL: s_add_v2i16_neg_inline_imm_splat:
153; GFX9:       ; %bb.0:
154; GFX9-NEXT:    s_lshr_b32 s1, s0, 16
155; GFX9-NEXT:    s_add_i32 s0, s0, 0xffc0ffc0
156; GFX9-NEXT:    s_add_i32 s1, s1, 0xffc0
157; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
158; GFX9-NEXT:    ; return to shader part epilog
159;
160; GFX8-LABEL: s_add_v2i16_neg_inline_imm_splat:
161; GFX8:       ; %bb.0:
162; GFX8-NEXT:    s_mov_b32 s3, 0xffff
163; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
164; GFX8-NEXT:    s_mov_b32 s1, 0xffc0
165; GFX8-NEXT:    s_and_b32 s0, s0, s3
166; GFX8-NEXT:    s_add_i32 s0, s0, s1
167; GFX8-NEXT:    s_add_i32 s2, s2, s1
168; GFX8-NEXT:    s_lshl_b32 s1, s2, 16
169; GFX8-NEXT:    s_and_b32 s0, s0, s3
170; GFX8-NEXT:    s_or_b32 s0, s1, s0
171; GFX8-NEXT:    ; return to shader part epilog
172  %add = add <2 x i16> %a, <i16 -64, i16 -64>
173  %cast = bitcast <2 x i16> %add to i32
174  ret i32 %cast
175}
176
177define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
178; GFX9-LABEL: s_add_v2i16_neg_inline_imm_lo:
179; GFX9:       ; %bb.0:
180; GFX9-NEXT:    s_lshr_b32 s1, s0, 16
181; GFX9-NEXT:    s_add_i32 s0, s0, 0x4ffc0
182; GFX9-NEXT:    s_add_i32 s1, s1, 4
183; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
184; GFX9-NEXT:    ; return to shader part epilog
185;
186; GFX8-LABEL: s_add_v2i16_neg_inline_imm_lo:
187; GFX8:       ; %bb.0:
188; GFX8-NEXT:    s_mov_b32 s2, 0xffff
189; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
190; GFX8-NEXT:    s_and_b32 s0, s0, s2
191; GFX8-NEXT:    s_add_i32 s0, s0, 0xffc0
192; GFX8-NEXT:    s_add_i32 s1, s1, 4
193; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
194; GFX8-NEXT:    s_and_b32 s0, s0, s2
195; GFX8-NEXT:    s_or_b32 s0, s1, s0
196; GFX8-NEXT:    ; return to shader part epilog
197  %add = add <2 x i16> %a, <i16 -64, i16 4>
198  %cast = bitcast <2 x i16> %add to i32
199  ret i32 %cast
200}
201
202define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
203; GFX9-LABEL: s_add_v2i16_neg_inline_imm_hi:
204; GFX9:       ; %bb.0:
205; GFX9-NEXT:    s_lshr_b32 s1, s0, 16
206; GFX9-NEXT:    s_add_i32 s0, s0, 0xffc00004
207; GFX9-NEXT:    s_add_i32 s1, s1, 0xffc0
208; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
209; GFX9-NEXT:    ; return to shader part epilog
210;
211; GFX8-LABEL: s_add_v2i16_neg_inline_imm_hi:
212; GFX8:       ; %bb.0:
213; GFX8-NEXT:    s_mov_b32 s2, 0xffff
214; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
215; GFX8-NEXT:    s_and_b32 s0, s0, s2
216; GFX8-NEXT:    s_add_i32 s0, s0, 4
217; GFX8-NEXT:    s_add_i32 s1, s1, 0xffc0
218; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
219; GFX8-NEXT:    s_and_b32 s0, s0, s2
220; GFX8-NEXT:    s_or_b32 s0, s1, s0
221; GFX8-NEXT:    ; return to shader part epilog
222  %add = add <2 x i16> %a, <i16 4, i16 -64>
223  %cast = bitcast <2 x i16> %add to i32
224  ret i32 %cast
225}
226
227define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
228; GFX9-LABEL: s_add_v2i16:
229; GFX9:       ; %bb.0:
230; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
231; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
232; GFX9-NEXT:    s_add_i32 s0, s0, s1
233; GFX9-NEXT:    s_add_i32 s2, s2, s3
234; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
235; GFX9-NEXT:    ; return to shader part epilog
236;
237; GFX8-LABEL: s_add_v2i16:
238; GFX8:       ; %bb.0:
239; GFX8-NEXT:    s_mov_b32 s3, 0xffff
240; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
241; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
242; GFX8-NEXT:    s_and_b32 s0, s0, s3
243; GFX8-NEXT:    s_and_b32 s1, s1, s3
244; GFX8-NEXT:    s_add_i32 s0, s0, s1
245; GFX8-NEXT:    s_add_i32 s2, s2, s4
246; GFX8-NEXT:    s_lshl_b32 s1, s2, 16
247; GFX8-NEXT:    s_and_b32 s0, s0, s3
248; GFX8-NEXT:    s_or_b32 s0, s1, s0
249; GFX8-NEXT:    ; return to shader part epilog
250  %add = add <2 x i16> %a, %b
251  %cast = bitcast <2 x i16> %add to i32
252  ret i32 %cast
253}
254
255define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg %b) {
256; GFX9-LABEL: s_add_v2i16_fneg_lhs:
257; GFX9:       ; %bb.0:
258; GFX9-NEXT:    s_xor_b32 s0, s0, 0x80008000
259; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
260; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
261; GFX9-NEXT:    s_add_i32 s0, s0, s1
262; GFX9-NEXT:    s_add_i32 s2, s2, s3
263; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
264; GFX9-NEXT:    ; return to shader part epilog
265;
266; GFX8-LABEL: s_add_v2i16_fneg_lhs:
267; GFX8:       ; %bb.0:
268; GFX8-NEXT:    s_xor_b32 s0, s0, 0x80008000
269; GFX8-NEXT:    s_mov_b32 s3, 0xffff
270; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
271; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
272; GFX8-NEXT:    s_and_b32 s0, s0, s3
273; GFX8-NEXT:    s_and_b32 s1, s1, s3
274; GFX8-NEXT:    s_add_i32 s0, s0, s1
275; GFX8-NEXT:    s_add_i32 s2, s2, s4
276; GFX8-NEXT:    s_lshl_b32 s1, s2, 16
277; GFX8-NEXT:    s_and_b32 s0, s0, s3
278; GFX8-NEXT:    s_or_b32 s0, s1, s0
279; GFX8-NEXT:    ; return to shader part epilog
280  %neg.a = fneg <2 x half> %a
281  %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
282  %add = add <2 x i16> %cast.neg.a, %b
283  %cast = bitcast <2 x i16> %add to i32
284  ret i32 %cast
285}
286
287define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg %b) {
288; GFX9-LABEL: s_add_v2i16_fneg_rhs:
289; GFX9:       ; %bb.0:
290; GFX9-NEXT:    s_xor_b32 s1, s1, 0x80008000
291; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
292; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
293; GFX9-NEXT:    s_add_i32 s0, s0, s1
294; GFX9-NEXT:    s_add_i32 s2, s2, s3
295; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
296; GFX9-NEXT:    ; return to shader part epilog
297;
298; GFX8-LABEL: s_add_v2i16_fneg_rhs:
299; GFX8:       ; %bb.0:
300; GFX8-NEXT:    s_xor_b32 s1, s1, 0x80008000
301; GFX8-NEXT:    s_mov_b32 s3, 0xffff
302; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
303; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
304; GFX8-NEXT:    s_and_b32 s0, s0, s3
305; GFX8-NEXT:    s_and_b32 s1, s1, s3
306; GFX8-NEXT:    s_add_i32 s0, s0, s1
307; GFX8-NEXT:    s_add_i32 s2, s2, s4
308; GFX8-NEXT:    s_lshl_b32 s1, s2, 16
309; GFX8-NEXT:    s_and_b32 s0, s0, s3
310; GFX8-NEXT:    s_or_b32 s0, s1, s0
311; GFX8-NEXT:    ; return to shader part epilog
312  %neg.b = fneg <2 x half> %b
313  %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
314  %add = add <2 x i16> %a, %cast.neg.b
315  %cast = bitcast <2 x i16> %add to i32
316  ret i32 %cast
317}
318
319define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x half> inreg %b) {
320; GFX9-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:
321; GFX9:       ; %bb.0:
322; GFX9-NEXT:    s_mov_b32 s2, 0x80008000
323; GFX9-NEXT:    s_xor_b32 s1, s1, s2
324; GFX9-NEXT:    s_xor_b32 s0, s0, s2
325; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
326; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
327; GFX9-NEXT:    s_add_i32 s0, s0, s1
328; GFX9-NEXT:    s_add_i32 s2, s2, s3
329; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
330; GFX9-NEXT:    ; return to shader part epilog
331;
332; GFX8-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:
333; GFX8:       ; %bb.0:
334; GFX8-NEXT:    s_mov_b32 s2, 0x80008000
335; GFX8-NEXT:    s_xor_b32 s1, s1, s2
336; GFX8-NEXT:    s_xor_b32 s0, s0, s2
337; GFX8-NEXT:    s_mov_b32 s3, 0xffff
338; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
339; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
340; GFX8-NEXT:    s_and_b32 s0, s0, s3
341; GFX8-NEXT:    s_and_b32 s1, s1, s3
342; GFX8-NEXT:    s_add_i32 s0, s0, s1
343; GFX8-NEXT:    s_add_i32 s2, s2, s4
344; GFX8-NEXT:    s_lshl_b32 s1, s2, 16
345; GFX8-NEXT:    s_and_b32 s0, s0, s3
346; GFX8-NEXT:    s_or_b32 s0, s1, s0
347; GFX8-NEXT:    ; return to shader part epilog
348  %neg.a = fneg <2 x half> %a
349  %neg.b = fneg <2 x half> %b
350  %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
351  %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
352  %add = add <2 x i16> %cast.neg.a, %cast.neg.b
353  %cast = bitcast <2 x i16> %add to i32
354  ret i32 %cast
355}
356