• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3; RUN:   | FileCheck -check-prefixes=CHECK,RV32I %s
4; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
5; RUN:   | FileCheck -check-prefixes=CHECK,RV32IM %s
6; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
7; RUN:   | FileCheck -check-prefixes=CHECK,RV64I %s
8; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
9; RUN:   | FileCheck -check-prefixes=CHECK,RV64IM %s
10
11define i32 @fold_urem_positive_odd(i32 %x) nounwind {
12; RV32I-LABEL: fold_urem_positive_odd:
13; RV32I:       # %bb.0:
14; RV32I-NEXT:    addi sp, sp, -16
15; RV32I-NEXT:    sw ra, 12(sp)
16; RV32I-NEXT:    addi a1, zero, 95
17; RV32I-NEXT:    call __umodsi3
18; RV32I-NEXT:    lw ra, 12(sp)
19; RV32I-NEXT:    addi sp, sp, 16
20; RV32I-NEXT:    ret
21;
22; RV32IM-LABEL: fold_urem_positive_odd:
23; RV32IM:       # %bb.0:
24; RV32IM-NEXT:    lui a1, 364242
25; RV32IM-NEXT:    addi a1, a1, 777
26; RV32IM-NEXT:    mulhu a1, a0, a1
27; RV32IM-NEXT:    sub a2, a0, a1
28; RV32IM-NEXT:    srli a2, a2, 1
29; RV32IM-NEXT:    add a1, a2, a1
30; RV32IM-NEXT:    srli a1, a1, 6
31; RV32IM-NEXT:    addi a2, zero, 95
32; RV32IM-NEXT:    mul a1, a1, a2
33; RV32IM-NEXT:    sub a0, a0, a1
34; RV32IM-NEXT:    ret
35;
36; RV64I-LABEL: fold_urem_positive_odd:
37; RV64I:       # %bb.0:
38; RV64I-NEXT:    addi sp, sp, -16
39; RV64I-NEXT:    sd ra, 8(sp)
40; RV64I-NEXT:    slli a0, a0, 32
41; RV64I-NEXT:    srli a0, a0, 32
42; RV64I-NEXT:    addi a1, zero, 95
43; RV64I-NEXT:    call __umoddi3
44; RV64I-NEXT:    ld ra, 8(sp)
45; RV64I-NEXT:    addi sp, sp, 16
46; RV64I-NEXT:    ret
47;
48; RV64IM-LABEL: fold_urem_positive_odd:
49; RV64IM:       # %bb.0:
50; RV64IM-NEXT:    slli a0, a0, 32
51; RV64IM-NEXT:    srli a0, a0, 32
52; RV64IM-NEXT:    lui a1, 1423
53; RV64IM-NEXT:    addiw a1, a1, -733
54; RV64IM-NEXT:    slli a1, a1, 15
55; RV64IM-NEXT:    addi a1, a1, 1035
56; RV64IM-NEXT:    slli a1, a1, 13
57; RV64IM-NEXT:    addi a1, a1, -1811
58; RV64IM-NEXT:    slli a1, a1, 12
59; RV64IM-NEXT:    addi a1, a1, 561
60; RV64IM-NEXT:    mulhu a1, a0, a1
61; RV64IM-NEXT:    sub a2, a0, a1
62; RV64IM-NEXT:    srli a2, a2, 1
63; RV64IM-NEXT:    add a1, a2, a1
64; RV64IM-NEXT:    srli a1, a1, 6
65; RV64IM-NEXT:    addi a2, zero, 95
66; RV64IM-NEXT:    mul a1, a1, a2
67; RV64IM-NEXT:    sub a0, a0, a1
68; RV64IM-NEXT:    ret
69  %1 = urem i32 %x, 95
70  ret i32 %1
71}
72
73
74define i32 @fold_urem_positive_even(i32 %x) nounwind {
75; RV32I-LABEL: fold_urem_positive_even:
76; RV32I:       # %bb.0:
77; RV32I-NEXT:    addi sp, sp, -16
78; RV32I-NEXT:    sw ra, 12(sp)
79; RV32I-NEXT:    addi a1, zero, 1060
80; RV32I-NEXT:    call __umodsi3
81; RV32I-NEXT:    lw ra, 12(sp)
82; RV32I-NEXT:    addi sp, sp, 16
83; RV32I-NEXT:    ret
84;
85; RV32IM-LABEL: fold_urem_positive_even:
86; RV32IM:       # %bb.0:
87; RV32IM-NEXT:    lui a1, 1012964
88; RV32IM-NEXT:    addi a1, a1, -61
89; RV32IM-NEXT:    mulhu a1, a0, a1
90; RV32IM-NEXT:    srli a1, a1, 10
91; RV32IM-NEXT:    addi a2, zero, 1060
92; RV32IM-NEXT:    mul a1, a1, a2
93; RV32IM-NEXT:    sub a0, a0, a1
94; RV32IM-NEXT:    ret
95;
96; RV64I-LABEL: fold_urem_positive_even:
97; RV64I:       # %bb.0:
98; RV64I-NEXT:    addi sp, sp, -16
99; RV64I-NEXT:    sd ra, 8(sp)
100; RV64I-NEXT:    slli a0, a0, 32
101; RV64I-NEXT:    srli a0, a0, 32
102; RV64I-NEXT:    addi a1, zero, 1060
103; RV64I-NEXT:    call __umoddi3
104; RV64I-NEXT:    ld ra, 8(sp)
105; RV64I-NEXT:    addi sp, sp, 16
106; RV64I-NEXT:    ret
107;
108; RV64IM-LABEL: fold_urem_positive_even:
109; RV64IM:       # %bb.0:
110; RV64IM-NEXT:    slli a0, a0, 32
111; RV64IM-NEXT:    srli a0, a0, 32
112; RV64IM-NEXT:    lui a1, 1048020
113; RV64IM-NEXT:    addiw a1, a1, -1793
114; RV64IM-NEXT:    slli a1, a1, 12
115; RV64IM-NEXT:    addi a1, a1, 139
116; RV64IM-NEXT:    slli a1, a1, 14
117; RV64IM-NEXT:    addi a1, a1, 1793
118; RV64IM-NEXT:    slli a1, a1, 12
119; RV64IM-NEXT:    addi a1, a1, -139
120; RV64IM-NEXT:    mulhu a1, a0, a1
121; RV64IM-NEXT:    srli a1, a1, 10
122; RV64IM-NEXT:    addi a2, zero, 1060
123; RV64IM-NEXT:    mul a1, a1, a2
124; RV64IM-NEXT:    sub a0, a0, a1
125; RV64IM-NEXT:    ret
126  %1 = urem i32 %x, 1060
127  ret i32 %1
128}
129
130
131; Don't fold if we can combine urem with udiv.
132define i32 @combine_urem_udiv(i32 %x) nounwind {
133; RV32I-LABEL: combine_urem_udiv:
134; RV32I:       # %bb.0:
135; RV32I-NEXT:    addi sp, sp, -16
136; RV32I-NEXT:    sw ra, 12(sp)
137; RV32I-NEXT:    sw s0, 8(sp)
138; RV32I-NEXT:    sw s1, 4(sp)
139; RV32I-NEXT:    mv s0, a0
140; RV32I-NEXT:    addi a1, zero, 95
141; RV32I-NEXT:    call __umodsi3
142; RV32I-NEXT:    mv s1, a0
143; RV32I-NEXT:    addi a1, zero, 95
144; RV32I-NEXT:    mv a0, s0
145; RV32I-NEXT:    call __udivsi3
146; RV32I-NEXT:    add a0, s1, a0
147; RV32I-NEXT:    lw s1, 4(sp)
148; RV32I-NEXT:    lw s0, 8(sp)
149; RV32I-NEXT:    lw ra, 12(sp)
150; RV32I-NEXT:    addi sp, sp, 16
151; RV32I-NEXT:    ret
152;
153; RV32IM-LABEL: combine_urem_udiv:
154; RV32IM:       # %bb.0:
155; RV32IM-NEXT:    lui a1, 364242
156; RV32IM-NEXT:    addi a1, a1, 777
157; RV32IM-NEXT:    mulhu a1, a0, a1
158; RV32IM-NEXT:    sub a2, a0, a1
159; RV32IM-NEXT:    srli a2, a2, 1
160; RV32IM-NEXT:    add a1, a2, a1
161; RV32IM-NEXT:    srli a1, a1, 6
162; RV32IM-NEXT:    addi a2, zero, 95
163; RV32IM-NEXT:    mul a2, a1, a2
164; RV32IM-NEXT:    sub a0, a0, a2
165; RV32IM-NEXT:    add a0, a0, a1
166; RV32IM-NEXT:    ret
167;
168; RV64I-LABEL: combine_urem_udiv:
169; RV64I:       # %bb.0:
170; RV64I-NEXT:    addi sp, sp, -32
171; RV64I-NEXT:    sd ra, 24(sp)
172; RV64I-NEXT:    sd s0, 16(sp)
173; RV64I-NEXT:    sd s1, 8(sp)
174; RV64I-NEXT:    slli a0, a0, 32
175; RV64I-NEXT:    srli s0, a0, 32
176; RV64I-NEXT:    addi a1, zero, 95
177; RV64I-NEXT:    mv a0, s0
178; RV64I-NEXT:    call __umoddi3
179; RV64I-NEXT:    mv s1, a0
180; RV64I-NEXT:    addi a1, zero, 95
181; RV64I-NEXT:    mv a0, s0
182; RV64I-NEXT:    call __udivdi3
183; RV64I-NEXT:    add a0, s1, a0
184; RV64I-NEXT:    ld s1, 8(sp)
185; RV64I-NEXT:    ld s0, 16(sp)
186; RV64I-NEXT:    ld ra, 24(sp)
187; RV64I-NEXT:    addi sp, sp, 32
188; RV64I-NEXT:    ret
189;
190; RV64IM-LABEL: combine_urem_udiv:
191; RV64IM:       # %bb.0:
192; RV64IM-NEXT:    slli a0, a0, 32
193; RV64IM-NEXT:    srli a0, a0, 32
194; RV64IM-NEXT:    lui a1, 1423
195; RV64IM-NEXT:    addiw a1, a1, -733
196; RV64IM-NEXT:    slli a1, a1, 15
197; RV64IM-NEXT:    addi a1, a1, 1035
198; RV64IM-NEXT:    slli a1, a1, 13
199; RV64IM-NEXT:    addi a1, a1, -1811
200; RV64IM-NEXT:    slli a1, a1, 12
201; RV64IM-NEXT:    addi a1, a1, 561
202; RV64IM-NEXT:    mulhu a1, a0, a1
203; RV64IM-NEXT:    sub a2, a0, a1
204; RV64IM-NEXT:    srli a2, a2, 1
205; RV64IM-NEXT:    add a1, a2, a1
206; RV64IM-NEXT:    srli a1, a1, 6
207; RV64IM-NEXT:    addi a2, zero, 95
208; RV64IM-NEXT:    mul a2, a1, a2
209; RV64IM-NEXT:    sub a0, a0, a2
210; RV64IM-NEXT:    add a0, a0, a1
211; RV64IM-NEXT:    ret
212  %1 = urem i32 %x, 95
213  %2 = udiv i32 %x, 95
214  %3 = add i32 %1, %2
215  ret i32 %3
216}
217
218; Don't fold for divisors that are a power of two.
219define i32 @dont_fold_urem_power_of_two(i32 %x) nounwind {
220; CHECK-LABEL: dont_fold_urem_power_of_two:
221; CHECK:       # %bb.0:
222; CHECK-NEXT:    andi a0, a0, 63
223; CHECK-NEXT:    ret
224  %1 = urem i32 %x, 64
225  ret i32 %1
226}
227
228; Don't fold if the divisor is one.
229define i32 @dont_fold_urem_one(i32 %x) nounwind {
230; CHECK-LABEL: dont_fold_urem_one:
231; CHECK:       # %bb.0:
232; CHECK-NEXT:    mv a0, zero
233; CHECK-NEXT:    ret
234  %1 = urem i32 %x, 1
235  ret i32 %1
236}
237
238; Don't fold if the divisor is 2^32.
239define i32 @dont_fold_urem_i32_umax(i32 %x) nounwind {
240; CHECK-LABEL: dont_fold_urem_i32_umax:
241; CHECK:       # %bb.0:
242; CHECK-NEXT:    ret
243  %1 = urem i32 %x, 4294967296
244  ret i32 %1
245}
246
247; Don't fold i64 urem
248define i64 @dont_fold_urem_i64(i64 %x) nounwind {
249; RV32I-LABEL: dont_fold_urem_i64:
250; RV32I:       # %bb.0:
251; RV32I-NEXT:    addi sp, sp, -16
252; RV32I-NEXT:    sw ra, 12(sp)
253; RV32I-NEXT:    addi a2, zero, 98
254; RV32I-NEXT:    mv a3, zero
255; RV32I-NEXT:    call __umoddi3
256; RV32I-NEXT:    lw ra, 12(sp)
257; RV32I-NEXT:    addi sp, sp, 16
258; RV32I-NEXT:    ret
259;
260; RV32IM-LABEL: dont_fold_urem_i64:
261; RV32IM:       # %bb.0:
262; RV32IM-NEXT:    addi sp, sp, -16
263; RV32IM-NEXT:    sw ra, 12(sp)
264; RV32IM-NEXT:    addi a2, zero, 98
265; RV32IM-NEXT:    mv a3, zero
266; RV32IM-NEXT:    call __umoddi3
267; RV32IM-NEXT:    lw ra, 12(sp)
268; RV32IM-NEXT:    addi sp, sp, 16
269; RV32IM-NEXT:    ret
270;
271; RV64I-LABEL: dont_fold_urem_i64:
272; RV64I:       # %bb.0:
273; RV64I-NEXT:    addi sp, sp, -16
274; RV64I-NEXT:    sd ra, 8(sp)
275; RV64I-NEXT:    addi a1, zero, 98
276; RV64I-NEXT:    call __umoddi3
277; RV64I-NEXT:    ld ra, 8(sp)
278; RV64I-NEXT:    addi sp, sp, 16
279; RV64I-NEXT:    ret
280;
281; RV64IM-LABEL: dont_fold_urem_i64:
282; RV64IM:       # %bb.0:
283; RV64IM-NEXT:    srli a1, a0, 1
284; RV64IM-NEXT:    lui a2, 2675
285; RV64IM-NEXT:    addiw a2, a2, -251
286; RV64IM-NEXT:    slli a2, a2, 13
287; RV64IM-NEXT:    addi a2, a2, 1839
288; RV64IM-NEXT:    slli a2, a2, 13
289; RV64IM-NEXT:    addi a2, a2, 167
290; RV64IM-NEXT:    slli a2, a2, 13
291; RV64IM-NEXT:    addi a2, a2, 1505
292; RV64IM-NEXT:    mulhu a1, a1, a2
293; RV64IM-NEXT:    srli a1, a1, 4
294; RV64IM-NEXT:    addi a2, zero, 98
295; RV64IM-NEXT:    mul a1, a1, a2
296; RV64IM-NEXT:    sub a0, a0, a1
297; RV64IM-NEXT:    ret
298  %1 = urem i64 %x, 98
299  ret i64 %1
300}
301