• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx704 < %s | FileCheck -check-prefix=GFX7 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
5
6define i32 @s_add_co_select_user() {
7; GFX7-LABEL: s_add_co_select_user:
8; GFX7:       ; %bb.0: ; %bb
9; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GFX7-NEXT:    s_mov_b64 s[4:5], 0
11; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x0
12; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
13; GFX7-NEXT:    v_add_i32_e64 v0, s[4:5], s6, s6
14; GFX7-NEXT:    s_or_b32 s4, s4, s5
15; GFX7-NEXT:    s_cmp_lg_u32 s4, 0
16; GFX7-NEXT:    s_addc_u32 s4, s6, 0
17; GFX7-NEXT:    v_mov_b32_e32 v1, s4
18; GFX7-NEXT:    s_cselect_b64 vcc, 1, 0
19; GFX7-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
20; GFX7-NEXT:    v_cmp_gt_u32_e64 vcc, s6, 31
21; GFX7-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
22; GFX7-NEXT:    s_setpc_b64 s[30:31]
23;
24; GFX9-LABEL: s_add_co_select_user:
25; GFX9:       ; %bb.0: ; %bb
26; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GFX9-NEXT:    s_mov_b64 s[4:5], 0
28; GFX9-NEXT:    s_load_dword s6, s[4:5], 0x0
29; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
30; GFX9-NEXT:    v_add_co_u32_e64 v0, s[4:5], s6, s6
31; GFX9-NEXT:    s_cmp_lg_u64 s[4:5], 0
32; GFX9-NEXT:    s_addc_u32 s4, s6, 0
33; GFX9-NEXT:    s_cselect_b64 vcc, 1, 0
34; GFX9-NEXT:    v_mov_b32_e32 v1, s4
35; GFX9-NEXT:    s_cmp_gt_u32 s6, 31
36; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
37; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
38; GFX9-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
39; GFX9-NEXT:    s_setpc_b64 s[30:31]
40;
41; GFX10-LABEL: s_add_co_select_user:
42; GFX10:       ; %bb.0: ; %bb
43; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
45; GFX10-NEXT:    s_mov_b64 s[4:5], 0
46; GFX10-NEXT:    s_load_dword s4, s[4:5], 0x0
47; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
48; GFX10-NEXT:    v_add_co_u32_e64 v0, s5, s4, s4
49; GFX10-NEXT:    s_cmpk_lg_u32 s5, 0x0
50; GFX10-NEXT:    s_addc_u32 s5, s4, 0
51; GFX10-NEXT:    s_cselect_b32 s6, 1, 0
52; GFX10-NEXT:    s_cmp_gt_u32 s4, 31
53; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, s5, s6
54; GFX10-NEXT:    s_cselect_b32 vcc_lo, -1, 0
55; GFX10-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
56; GFX10-NEXT:    s_setpc_b64 s[30:31]
57bb:
58  %i = load volatile i32, i32 addrspace(4)* null, align 8
59  %i1 = add i32 %i, %i
60  %i2 = icmp ult i32 %i1, %i
61  %i3 = zext i1 %i2 to i32
62  %i4 = add nuw nsw i32 %i3, 0
63  %i5 = add i32 %i4, %i
64  %i6 = icmp ult i32 %i5, %i4
65  %i7 = select i1 %i6, i32 %i5, i32 0
66  %i8 = icmp ugt i32 %i, 31
67  %i9 = select i1 %i8, i32 %i1, i32 %i7
68  ret i32 %i9
69}
70
71define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
72; GFX7-LABEL: s_add_co_br_user:
73; GFX7:       ; %bb.0: ; %bb
74; GFX7-NEXT:    s_load_dword s0, s[4:5], 0x0
75; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
76; GFX7-NEXT:    s_add_i32 s1, s0, s0
77; GFX7-NEXT:    v_mov_b32_e32 v0, s0
78; GFX7-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v0
79; GFX7-NEXT:    s_or_b32 s1, vcc_lo, vcc_hi
80; GFX7-NEXT:    s_cmp_lg_u32 s1, 0
81; GFX7-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
82; GFX7-NEXT:    s_addc_u32 s0, s0, 0
83; GFX7-NEXT:    v_cmp_ge_u32_e32 vcc, s0, v0
84; GFX7-NEXT:    s_and_b64 vcc, exec, vcc
85; GFX7-NEXT:    s_cbranch_vccnz BB1_2
86; GFX7-NEXT:  ; %bb.1: ; %bb0
87; GFX7-NEXT:    v_mov_b32_e32 v0, 0
88; GFX7-NEXT:    v_mov_b32_e32 v2, 9
89; GFX7-NEXT:    v_mov_b32_e32 v1, 0
90; GFX7-NEXT:    flat_store_dword v[0:1], v2
91; GFX7-NEXT:  BB1_2: ; %bb1
92; GFX7-NEXT:    v_mov_b32_e32 v0, 0
93; GFX7-NEXT:    v_mov_b32_e32 v2, 10
94; GFX7-NEXT:    v_mov_b32_e32 v1, 0
95; GFX7-NEXT:    flat_store_dword v[0:1], v2
96; GFX7-NEXT:    s_endpgm
97;
98; GFX9-LABEL: s_add_co_br_user:
99; GFX9:       ; %bb.0: ; %bb
100; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
101; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
102; GFX9-NEXT:    s_add_i32 s1, s0, s0
103; GFX9-NEXT:    v_mov_b32_e32 v0, s0
104; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s1, v0
105; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
106; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
107; GFX9-NEXT:    s_addc_u32 s0, s0, 0
108; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, s0, v0
109; GFX9-NEXT:    s_and_b64 vcc, exec, vcc
110; GFX9-NEXT:    s_cbranch_vccnz BB1_2
111; GFX9-NEXT:  ; %bb.1: ; %bb0
112; GFX9-NEXT:    v_mov_b32_e32 v0, 0
113; GFX9-NEXT:    v_mov_b32_e32 v2, 9
114; GFX9-NEXT:    v_mov_b32_e32 v1, 0
115; GFX9-NEXT:    global_store_dword v[0:1], v2, off
116; GFX9-NEXT:  BB1_2: ; %bb1
117; GFX9-NEXT:    v_mov_b32_e32 v0, 0
118; GFX9-NEXT:    v_mov_b32_e32 v2, 10
119; GFX9-NEXT:    v_mov_b32_e32 v1, 0
120; GFX9-NEXT:    global_store_dword v[0:1], v2, off
121; GFX9-NEXT:    s_endpgm
122;
123; GFX10-LABEL: s_add_co_br_user:
124; GFX10:       ; %bb.0: ; %bb
125; GFX10-NEXT:    s_load_dword s0, s[4:5], 0x0
126; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
127; GFX10-NEXT:    s_add_i32 s1, s0, s0
128; GFX10-NEXT:    v_cmp_lt_u32_e64 s1, s1, s0
129; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s1
130; GFX10-NEXT:    s_cmpk_lg_u32 s1, 0x0
131; GFX10-NEXT:    s_addc_u32 s0, s0, 0
132; GFX10-NEXT:    v_cmp_ge_u32_e32 vcc_lo, s0, v0
133; GFX10-NEXT:    s_and_b32 vcc_lo, exec_lo, vcc_lo
134; GFX10-NEXT:    s_cbranch_vccnz BB1_2
135; GFX10-NEXT:  ; %bb.1: ; %bb0
136; GFX10-NEXT:    v_mov_b32_e32 v0, 0
137; GFX10-NEXT:    v_mov_b32_e32 v2, 9
138; GFX10-NEXT:    v_mov_b32_e32 v1, 0
139; GFX10-NEXT:    global_store_dword v[0:1], v2, off
140; GFX10-NEXT:  BB1_2: ; %bb1
141; GFX10-NEXT:    v_mov_b32_e32 v0, 0
142; GFX10-NEXT:    v_mov_b32_e32 v2, 10
143; GFX10-NEXT:    v_mov_b32_e32 v1, 0
144; GFX10-NEXT:    global_store_dword v[0:1], v2, off
145; GFX10-NEXT:    s_endpgm
146bb:
147  %i1 = add i32 %i, %i
148  %i2 = icmp ult i32 %i1, %i
149  %i3 = zext i1 %i2 to i32
150  %i4 = add nuw nsw i32 %i3, 0
151  %i5 = add i32 %i4, %i
152  %i6 = icmp ult i32 %i5, %i4
153  %i7 = select i1 %i6, i32 %i5, i32 0
154  br i1 %i6, label %bb0, label %bb1
155
156bb0:
157  store volatile i32 9, i32 addrspace(1)* null
158  br label %bb1
159
160bb1:
161  store volatile i32 10, i32 addrspace(1)* null
162  ret void
163}
164