• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s
4
5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
6
7define i32 @v_srem_i32(i32 %num, i32 %den) {
8; GISEL-LABEL: v_srem_i32:
9; GISEL:       ; %bb.0:
10; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
12; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
13; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
14; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
15; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
16; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
17; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
18; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
19; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
20; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
21; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
22; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v3
23; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
24; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
25; GISEL-NEXT:    v_mul_hi_u32 v3, v0, v3
26; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v1
27; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
28; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
29; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
30; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
31; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
32; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
33; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
34; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
35; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
36; GISEL-NEXT:    s_setpc_b64 s[30:31]
37;
38; CGP-LABEL: v_srem_i32:
39; CGP:       ; %bb.0:
40; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
42; CGP-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
43; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
44; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
45; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
46; CGP-NEXT:    v_xor_b32_e32 v1, v1, v3
47; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v1
48; CGP-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
49; CGP-NEXT:    v_rcp_f32_e32 v3, v3
50; CGP-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
51; CGP-NEXT:    v_cvt_u32_f32_e32 v3, v3
52; CGP-NEXT:    v_mul_lo_u32 v4, v4, v3
53; CGP-NEXT:    v_mul_lo_u32 v5, 0, v4
54; CGP-NEXT:    v_mul_hi_u32 v4, v3, v4
55; CGP-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
56; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
57; CGP-NEXT:    v_mul_lo_u32 v4, 0, v3
58; CGP-NEXT:    v_mul_hi_u32 v3, v0, v3
59; CGP-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
60; CGP-NEXT:    v_mul_lo_u32 v3, v3, v1
61; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
62; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
63; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
64; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
65; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
66; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
67; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
68; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
69; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
70; CGP-NEXT:    s_setpc_b64 s[30:31]
71  %result = srem i32 %num, %den
72  ret i32 %result
73}
74
75; FIXME: This is a workaround for not handling uniform VGPR case.
76declare i32 @llvm.amdgcn.readfirstlane(i32)
77
78define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) {
79; GISEL-LABEL: s_srem_i32:
80; GISEL:       ; %bb.0:
81; GISEL-NEXT:    s_ashr_i32 s2, s0, 31
82; GISEL-NEXT:    s_ashr_i32 s3, s1, 31
83; GISEL-NEXT:    s_add_i32 s0, s0, s2
84; GISEL-NEXT:    s_add_i32 s1, s1, s3
85; GISEL-NEXT:    s_xor_b32 s0, s0, s2
86; GISEL-NEXT:    s_xor_b32 s1, s1, s3
87; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, s1
88; GISEL-NEXT:    s_sub_i32 s3, 0, s1
89; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
90; GISEL-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
91; GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
92; GISEL-NEXT:    v_mul_lo_u32 v1, s3, v0
93; GISEL-NEXT:    v_mul_hi_u32 v1, v0, v1
94; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
95; GISEL-NEXT:    v_mul_hi_u32 v0, s0, v0
96; GISEL-NEXT:    v_mul_lo_u32 v0, v0, s1
97; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
98; GISEL-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
99; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
100; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
101; GISEL-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
102; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
103; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
104; GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
105; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s2, v0
106; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
107; GISEL-NEXT:    ; return to shader part epilog
108;
109; CGP-LABEL: s_srem_i32:
110; CGP:       ; %bb.0:
111; CGP-NEXT:    s_ashr_i32 s2, s0, 31
112; CGP-NEXT:    s_ashr_i32 s3, s1, 31
113; CGP-NEXT:    s_add_i32 s0, s0, s2
114; CGP-NEXT:    s_add_i32 s1, s1, s3
115; CGP-NEXT:    s_xor_b32 s0, s0, s2
116; CGP-NEXT:    s_xor_b32 s1, s1, s3
117; CGP-NEXT:    v_cvt_f32_u32_e32 v0, s1
118; CGP-NEXT:    s_sub_i32 s3, 0, s1
119; CGP-NEXT:    v_rcp_f32_e32 v0, v0
120; CGP-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
121; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
122; CGP-NEXT:    v_mul_lo_u32 v1, s3, v0
123; CGP-NEXT:    v_mul_lo_u32 v2, 0, v1
124; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
125; CGP-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
126; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
127; CGP-NEXT:    v_mul_lo_u32 v1, 0, v0
128; CGP-NEXT:    v_mul_hi_u32 v0, s0, v0
129; CGP-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
130; CGP-NEXT:    v_mul_lo_u32 v0, v0, s1
131; CGP-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
132; CGP-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
133; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
134; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
135; CGP-NEXT:    v_subrev_i32_e32 v1, vcc, s1, v0
136; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
137; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
138; CGP-NEXT:    v_xor_b32_e32 v0, s2, v0
139; CGP-NEXT:    v_subrev_i32_e32 v0, vcc, s2, v0
140; CGP-NEXT:    v_readfirstlane_b32 s0, v0
141; CGP-NEXT:    ; return to shader part epilog
142  %result = srem i32 %num, %den
143  %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result)
144  ret i32 %readlane
145}
146
147define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) {
148; GISEL-LABEL: v_srem_v2i32:
149; GISEL:       ; %bb.0:
150; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
152; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
153; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
154; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
155; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
156; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
157; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
158; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
159; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
160; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
161; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
162; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
163; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v2
164; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
165; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v3
166; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
167; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
168; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
169; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
170; GISEL-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
171; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
172; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
173; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v5
174; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v8
175; GISEL-NEXT:    v_mul_hi_u32 v7, v5, v7
176; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
177; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
178; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
179; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
180; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
181; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v2
182; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
183; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
184; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
185; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
186; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
187; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
188; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
189; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
190; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
191; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
192; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
193; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
194; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
195; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
196; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
197; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
198; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
199; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
200; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
201; GISEL-NEXT:    s_setpc_b64 s[30:31]
202;
203; CGP-LABEL: v_srem_v2i32:
204; CGP:       ; %bb.0:
205; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
207; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
208; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
209; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
210; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
211; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
212; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
213; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
214; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
215; CGP-NEXT:    v_xor_b32_e32 v2, v2, v5
216; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
217; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
218; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v2
219; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
220; CGP-NEXT:    v_cvt_f32_u32_e32 v8, v3
221; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
222; CGP-NEXT:    v_rcp_f32_e32 v5, v5
223; CGP-NEXT:    v_rcp_f32_e32 v8, v8
224; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
225; CGP-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
226; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
227; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
228; CGP-NEXT:    v_mul_lo_u32 v7, v7, v5
229; CGP-NEXT:    v_mul_lo_u32 v9, v9, v8
230; CGP-NEXT:    v_mul_lo_u32 v10, 0, v7
231; CGP-NEXT:    v_mul_hi_u32 v7, v5, v7
232; CGP-NEXT:    v_mul_lo_u32 v11, 0, v9
233; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
234; CGP-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
235; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
236; CGP-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
237; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
238; CGP-NEXT:    v_mul_lo_u32 v8, 0, v5
239; CGP-NEXT:    v_mul_hi_u32 v5, v0, v5
240; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
241; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
242; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
243; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
244; CGP-NEXT:    v_mul_lo_u32 v5, v5, v2
245; CGP-NEXT:    v_mul_lo_u32 v7, v7, v3
246; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
247; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
248; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
249; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
250; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
251; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
252; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
253; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
254; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
255; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
256; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
257; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
258; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
259; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
260; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
261; CGP-NEXT:    v_xor_b32_e32 v1, v1, v6
262; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
263; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
264; CGP-NEXT:    s_setpc_b64 s[30:31]
265  %result = srem <2 x i32> %num, %den
266  ret <2 x i32> %result
267}
268
269define i32 @v_srem_i32_pow2k_denom(i32 %num) {
270; CHECK-LABEL: v_srem_i32_pow2k_denom:
271; CHECK:       ; %bb.0:
272; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273; CHECK-NEXT:    s_movk_i32 s4, 0x1000
274; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
275; CHECK-NEXT:    v_mov_b32_e32 v2, 0xfffff000
276; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
277; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s4
278; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
279; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
280; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
281; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
282; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v3
283; CHECK-NEXT:    v_mul_hi_u32 v2, v3, v2
284; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
285; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
286; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 12, v2
287; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
288; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
289; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
290; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
291; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
292; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
293; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
294; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
295; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
296; CHECK-NEXT:    s_setpc_b64 s[30:31]
297  %result = srem i32 %num, 4096
298  ret i32 %result
299}
300
301define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
302; GISEL-LABEL: v_srem_v2i32_pow2k_denom:
303; GISEL:       ; %bb.0:
304; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
306; GISEL-NEXT:    s_add_i32 s4, 0x1000, 0
307; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
308; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
309; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s4
310; GISEL-NEXT:    s_sub_i32 s5, 0, s4
311; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
312; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
313; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
314; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
315; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v4
316; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
317; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
318; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
319; GISEL-NEXT:    v_mul_lo_u32 v6, s5, v5
320; GISEL-NEXT:    v_mul_lo_u32 v7, s5, v4
321; GISEL-NEXT:    v_mul_hi_u32 v6, v5, v6
322; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v7
323; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
324; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
325; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
326; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
327; GISEL-NEXT:    v_mul_lo_u32 v5, v5, s4
328; GISEL-NEXT:    v_mul_lo_u32 v4, v4, s4
329; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
330; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
331; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
332; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, s4, v1
333; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
334; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
335; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
336; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
337; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
338; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, s4, v1
339; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
340; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
341; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
342; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
343; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
344; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
345; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
346; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
347; GISEL-NEXT:    s_setpc_b64 s[30:31]
348;
349; CGP-LABEL: v_srem_v2i32_pow2k_denom:
350; CGP:       ; %bb.0:
351; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
352; CGP-NEXT:    s_movk_i32 s4, 0x1000
353; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
354; CGP-NEXT:    v_mov_b32_e32 v3, 0x1000
355; CGP-NEXT:    s_movk_i32 s5, 0xf000
356; CGP-NEXT:    v_mov_b32_e32 v4, 0xfffff000
357; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
358; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
359; CGP-NEXT:    v_cvt_f32_u32_e32 v6, s4
360; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
361; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
362; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
363; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
364; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
365; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, v7
366; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
367; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
368; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
369; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
370; CGP-NEXT:    v_mul_lo_u32 v8, s5, v6
371; CGP-NEXT:    v_mul_lo_u32 v4, v4, v7
372; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
373; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
374; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
375; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
376; CGP-NEXT:    v_mul_hi_u32 v6, v0, v6
377; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
378; CGP-NEXT:    v_lshlrev_b32_e32 v6, 12, v6
379; CGP-NEXT:    v_lshlrev_b32_e32 v4, 12, v4
380; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
381; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
382; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
383; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v1, v3
384; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
385; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
386; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
387; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
388; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
389; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v1, v3
390; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
391; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
392; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
393; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
394; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
395; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
396; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
397; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
398; CGP-NEXT:    s_setpc_b64 s[30:31]
399  %result = srem <2 x i32> %num, <i32 4096, i32 4096>
400  ret <2 x i32> %result
401}
402
403define i32 @v_srem_i32_oddk_denom(i32 %num) {
404; CHECK-LABEL: v_srem_i32_oddk_denom:
405; CHECK:       ; %bb.0:
406; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
407; CHECK-NEXT:    s_mov_b32 s4, 0x12d8fb
408; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
409; CHECK-NEXT:    v_mov_b32_e32 v2, 0xffed2705
410; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
411; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s4
412; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
413; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
414; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
415; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
416; CHECK-NEXT:    v_mul_lo_u32 v2, v2, v3
417; CHECK-NEXT:    v_mul_hi_u32 v2, v3, v2
418; CHECK-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
419; CHECK-NEXT:    v_mul_hi_u32 v2, v0, v2
420; CHECK-NEXT:    v_mul_lo_u32 v2, v2, s4
421; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
422; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
423; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
424; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
425; CHECK-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v0
426; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
427; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
428; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
429; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
430; CHECK-NEXT:    s_setpc_b64 s[30:31]
431  %result = srem i32 %num, 1235195
432  ret i32 %result
433}
434
435define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
436; GISEL-LABEL: v_srem_v2i32_oddk_denom:
437; GISEL:       ; %bb.0:
438; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
440; GISEL-NEXT:    s_add_i32 s4, 0x12d8fb, 0
441; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
442; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
443; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s4
444; GISEL-NEXT:    s_sub_i32 s5, 0, s4
445; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
446; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
447; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
448; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
449; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v4
450; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
451; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
452; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
453; GISEL-NEXT:    v_mul_lo_u32 v6, s5, v5
454; GISEL-NEXT:    v_mul_lo_u32 v7, s5, v4
455; GISEL-NEXT:    v_mul_hi_u32 v6, v5, v6
456; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v7
457; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
458; GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
459; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
460; GISEL-NEXT:    v_mul_hi_u32 v4, v1, v4
461; GISEL-NEXT:    v_mul_lo_u32 v5, v5, s4
462; GISEL-NEXT:    v_mul_lo_u32 v4, v4, s4
463; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
464; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
465; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
466; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, s4, v1
467; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
468; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
469; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
470; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
471; GISEL-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
472; GISEL-NEXT:    v_subrev_i32_e32 v5, vcc, s4, v1
473; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
474; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
475; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s4, v1
476; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
477; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
478; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
479; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
480; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
481; GISEL-NEXT:    s_setpc_b64 s[30:31]
482;
483; CGP-LABEL: v_srem_v2i32_oddk_denom:
484; CGP:       ; %bb.0:
485; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486; CGP-NEXT:    s_mov_b32 s4, 0x12d8fb
487; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
488; CGP-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
489; CGP-NEXT:    s_mov_b32 s5, 0xffed2705
490; CGP-NEXT:    v_mov_b32_e32 v4, 0xffed2705
491; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
492; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
493; CGP-NEXT:    v_cvt_f32_u32_e32 v6, s4
494; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
495; CGP-NEXT:    v_cvt_f32_u32_e32 v7, v3
496; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
497; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
498; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
499; CGP-NEXT:    v_rcp_iflag_f32_e32 v7, v7
500; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
501; CGP-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
502; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
503; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v7
504; CGP-NEXT:    v_mul_lo_u32 v8, s5, v6
505; CGP-NEXT:    v_mul_lo_u32 v4, v4, v7
506; CGP-NEXT:    v_mul_hi_u32 v8, v6, v8
507; CGP-NEXT:    v_mul_hi_u32 v4, v7, v4
508; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
509; CGP-NEXT:    v_add_i32_e32 v4, vcc, v7, v4
510; CGP-NEXT:    v_mul_hi_u32 v6, v0, v6
511; CGP-NEXT:    v_mul_hi_u32 v4, v1, v4
512; CGP-NEXT:    v_mul_lo_u32 v6, v6, s4
513; CGP-NEXT:    v_mul_lo_u32 v4, v4, v3
514; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
515; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
516; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
517; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v1, v3
518; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
519; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
520; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
521; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
522; CGP-NEXT:    v_subrev_i32_e32 v4, vcc, s4, v0
523; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v1, v3
524; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
525; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
526; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
527; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
528; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
529; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
530; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
531; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
532; CGP-NEXT:    s_setpc_b64 s[30:31]
533  %result = srem <2 x i32> %num, <i32 1235195, i32 1235195>
534  ret <2 x i32> %result
535}
536
537define i32 @v_srem_i32_pow2_shl_denom(i32 %x, i32 %y) {
538; CHECK-LABEL: v_srem_i32_pow2_shl_denom:
539; CHECK:       ; %bb.0:
540; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
541; CHECK-NEXT:    v_lshl_b32_e32 v1, 0x1000, v1
542; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
543; CHECK-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
544; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
545; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
546; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
547; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v3
548; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v1
549; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
550; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
551; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
552; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
553; CHECK-NEXT:    v_mul_lo_u32 v4, v4, v3
554; CHECK-NEXT:    v_mul_hi_u32 v4, v3, v4
555; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
556; CHECK-NEXT:    v_mul_hi_u32 v3, v0, v3
557; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v1
558; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
559; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
560; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
561; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
562; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
563; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
564; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
565; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
566; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
567; CHECK-NEXT:    s_setpc_b64 s[30:31]
568  %shl.y = shl i32 4096, %y
569  %r = srem i32 %x, %shl.y
570  ret i32 %r
571}
572
573define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
574; GISEL-LABEL: v_srem_v2i32_pow2_shl_denom:
575; GISEL:       ; %bb.0:
576; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
577; GISEL-NEXT:    s_movk_i32 s4, 0x1000
578; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
579; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
580; GISEL-NEXT:    v_lshl_b32_e32 v2, s4, v2
581; GISEL-NEXT:    v_lshl_b32_e32 v3, s4, v3
582; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
583; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
584; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
585; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
586; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
587; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
588; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
589; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
590; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v6
591; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
592; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v2
593; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
594; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v3
595; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
596; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
597; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
598; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
599; GISEL-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
600; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
601; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
602; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v6
603; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v8
604; GISEL-NEXT:    v_mul_hi_u32 v7, v6, v7
605; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
606; GISEL-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
607; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
608; GISEL-NEXT:    v_mul_hi_u32 v6, v0, v6
609; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
610; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v2
611; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
612; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
613; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
614; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
615; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
616; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
617; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
618; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
619; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
620; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
621; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
622; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
623; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
624; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
625; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
626; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
627; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
628; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
629; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
630; GISEL-NEXT:    s_setpc_b64 s[30:31]
631;
632; CGP-LABEL: v_srem_v2i32_pow2_shl_denom:
633; CGP:       ; %bb.0:
634; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
635; CGP-NEXT:    s_movk_i32 s4, 0x1000
636; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
637; CGP-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
638; CGP-NEXT:    v_lshl_b32_e32 v2, s4, v2
639; CGP-NEXT:    v_lshl_b32_e32 v3, s4, v3
640; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
641; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
642; CGP-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
643; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
644; CGP-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
645; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
646; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
647; CGP-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
648; CGP-NEXT:    v_xor_b32_e32 v2, v2, v6
649; CGP-NEXT:    v_xor_b32_e32 v3, v3, v7
650; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v2
651; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
652; CGP-NEXT:    v_cvt_f32_u32_e32 v8, v3
653; CGP-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
654; CGP-NEXT:    v_rcp_f32_e32 v6, v6
655; CGP-NEXT:    v_rcp_f32_e32 v8, v8
656; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
657; CGP-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
658; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
659; CGP-NEXT:    v_cvt_u32_f32_e32 v8, v8
660; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
661; CGP-NEXT:    v_mul_lo_u32 v9, v9, v8
662; CGP-NEXT:    v_mul_lo_u32 v10, 0, v7
663; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
664; CGP-NEXT:    v_mul_lo_u32 v11, 0, v9
665; CGP-NEXT:    v_mul_hi_u32 v9, v8, v9
666; CGP-NEXT:    v_add_i32_e32 v7, vcc, v10, v7
667; CGP-NEXT:    v_add_i32_e32 v9, vcc, v11, v9
668; CGP-NEXT:    v_add_i32_e32 v6, vcc, v6, v7
669; CGP-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
670; CGP-NEXT:    v_mul_lo_u32 v8, 0, v6
671; CGP-NEXT:    v_mul_hi_u32 v6, v0, v6
672; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
673; CGP-NEXT:    v_mul_hi_u32 v7, v1, v7
674; CGP-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
675; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
676; CGP-NEXT:    v_mul_lo_u32 v6, v6, v2
677; CGP-NEXT:    v_mul_lo_u32 v7, v7, v3
678; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
679; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
680; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
681; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
682; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
683; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
684; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
685; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
686; CGP-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
687; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
688; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
689; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
690; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
691; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
692; CGP-NEXT:    v_xor_b32_e32 v0, v0, v4
693; CGP-NEXT:    v_xor_b32_e32 v1, v1, v5
694; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
695; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
696; CGP-NEXT:    s_setpc_b64 s[30:31]
697  %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
698  %r = srem <2 x i32> %x, %shl.y
699  ret <2 x i32> %r
700}
701
702define i32 @v_srem_i32_24bit(i32 %num, i32 %den) {
703; GISEL-LABEL: v_srem_i32_24bit:
704; GISEL:       ; %bb.0:
705; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
706; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
707; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
708; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
709; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
710; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
711; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
712; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
713; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
714; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
715; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
716; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, 0, v1
717; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
718; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
719; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
720; GISEL-NEXT:    v_mul_lo_u32 v4, v4, v3
721; GISEL-NEXT:    v_mul_hi_u32 v4, v3, v4
722; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
723; GISEL-NEXT:    v_mul_hi_u32 v3, v0, v3
724; GISEL-NEXT:    v_mul_lo_u32 v3, v3, v1
725; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v3
726; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
727; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
728; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
729; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, v0, v1
730; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
731; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
732; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
733; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
734; GISEL-NEXT:    s_setpc_b64 s[30:31]
735;
736; CGP-LABEL: v_srem_i32_24bit:
737; CGP:       ; %bb.0:
738; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
739; CGP-NEXT:    s_mov_b32 s4, 0xffffff
740; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
741; CGP-NEXT:    v_and_b32_e32 v1, s4, v1
742; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v1
743; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
744; CGP-NEXT:    v_rcp_f32_e32 v2, v2
745; CGP-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
746; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
747; CGP-NEXT:    v_mul_lo_u32 v3, v3, v2
748; CGP-NEXT:    v_mul_lo_u32 v4, 0, v3
749; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
750; CGP-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
751; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
752; CGP-NEXT:    v_mul_lo_u32 v3, 0, v2
753; CGP-NEXT:    v_mul_hi_u32 v2, v0, v2
754; CGP-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
755; CGP-NEXT:    v_mul_lo_u32 v2, v2, v1
756; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
757; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
758; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
759; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
760; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v1
761; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v1
762; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
763; CGP-NEXT:    s_setpc_b64 s[30:31]
764  %num.mask = and i32 %num, 16777215
765  %den.mask = and i32 %den, 16777215
766  %result = srem i32 %num.mask, %den.mask
767  ret i32 %result
768}
769
770define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
771; GISEL-LABEL: v_srem_v2i32_24bit:
772; GISEL:       ; %bb.0:
773; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
774; GISEL-NEXT:    s_mov_b32 s4, 0xffffff
775; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
776; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
777; GISEL-NEXT:    v_and_b32_e32 v2, s4, v2
778; GISEL-NEXT:    v_and_b32_e32 v3, s4, v3
779; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
780; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
781; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
782; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
783; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
784; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
785; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
786; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
787; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
788; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
789; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
790; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
791; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v2
792; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v2
793; GISEL-NEXT:    v_cvt_f32_u32_e32 v8, v3
794; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, 0, v3
795; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
796; GISEL-NEXT:    v_rcp_iflag_f32_e32 v8, v8
797; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f7ffffe, v5
798; GISEL-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
799; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
800; GISEL-NEXT:    v_cvt_u32_f32_e32 v8, v8
801; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v5
802; GISEL-NEXT:    v_mul_lo_u32 v9, v9, v8
803; GISEL-NEXT:    v_mul_hi_u32 v7, v5, v7
804; GISEL-NEXT:    v_mul_hi_u32 v9, v8, v9
805; GISEL-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
806; GISEL-NEXT:    v_add_i32_e32 v7, vcc, v8, v9
807; GISEL-NEXT:    v_mul_hi_u32 v5, v0, v5
808; GISEL-NEXT:    v_mul_hi_u32 v7, v1, v7
809; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v2
810; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
811; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v5
812; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
813; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
814; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
815; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
816; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
817; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
818; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
819; GISEL-NEXT:    v_sub_i32_e32 v5, vcc, v0, v2
820; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
821; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
822; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
823; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
824; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
825; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
826; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
827; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
828; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
829; GISEL-NEXT:    s_setpc_b64 s[30:31]
830;
831; CGP-LABEL: v_srem_v2i32_24bit:
832; CGP:       ; %bb.0:
833; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
834; CGP-NEXT:    s_mov_b32 s4, 0xffffff
835; CGP-NEXT:    v_and_b32_e32 v0, s4, v0
836; CGP-NEXT:    v_and_b32_e32 v1, s4, v1
837; CGP-NEXT:    v_and_b32_e32 v2, s4, v2
838; CGP-NEXT:    v_and_b32_e32 v3, s4, v3
839; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v2
840; CGP-NEXT:    v_sub_i32_e32 v5, vcc, 0, v2
841; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
842; CGP-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
843; CGP-NEXT:    v_rcp_f32_e32 v4, v4
844; CGP-NEXT:    v_rcp_f32_e32 v6, v6
845; CGP-NEXT:    v_mul_f32_e32 v4, 0x4f7ffffe, v4
846; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
847; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
848; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
849; CGP-NEXT:    v_mul_lo_u32 v5, v5, v4
850; CGP-NEXT:    v_mul_lo_u32 v7, v7, v6
851; CGP-NEXT:    v_mul_lo_u32 v8, 0, v5
852; CGP-NEXT:    v_mul_hi_u32 v5, v4, v5
853; CGP-NEXT:    v_mul_lo_u32 v9, 0, v7
854; CGP-NEXT:    v_mul_hi_u32 v7, v6, v7
855; CGP-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
856; CGP-NEXT:    v_add_i32_e32 v7, vcc, v9, v7
857; CGP-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
858; CGP-NEXT:    v_add_i32_e32 v5, vcc, v6, v7
859; CGP-NEXT:    v_mul_lo_u32 v6, 0, v4
860; CGP-NEXT:    v_mul_hi_u32 v4, v0, v4
861; CGP-NEXT:    v_mul_lo_u32 v7, 0, v5
862; CGP-NEXT:    v_mul_hi_u32 v5, v1, v5
863; CGP-NEXT:    v_add_i32_e32 v4, vcc, v6, v4
864; CGP-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
865; CGP-NEXT:    v_mul_lo_u32 v4, v4, v2
866; CGP-NEXT:    v_mul_lo_u32 v5, v5, v3
867; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
868; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
869; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
870; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
871; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
872; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
873; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
874; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
875; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v0, v2
876; CGP-NEXT:    v_sub_i32_e32 v5, vcc, v1, v3
877; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
878; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
879; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
880; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
881; CGP-NEXT:    s_setpc_b64 s[30:31]
882  %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215>
883  %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215>
884  %result = srem <2 x i32> %num.mask, %den.mask
885  ret <2 x i32> %result
886}
887