1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ 3; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s 4; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \ 5; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s 6; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ 7; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s 8; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ 9; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s 10 11define i32 @fold_urem_positive_odd(i32 %x) nounwind { 12; RV32I-LABEL: fold_urem_positive_odd: 13; RV32I: # %bb.0: 14; RV32I-NEXT: addi sp, sp, -16 15; RV32I-NEXT: sw ra, 12(sp) 16; RV32I-NEXT: addi a1, zero, 95 17; RV32I-NEXT: call __umodsi3 18; RV32I-NEXT: lw ra, 12(sp) 19; RV32I-NEXT: addi sp, sp, 16 20; RV32I-NEXT: ret 21; 22; RV32IM-LABEL: fold_urem_positive_odd: 23; RV32IM: # %bb.0: 24; RV32IM-NEXT: lui a1, 364242 25; RV32IM-NEXT: addi a1, a1, 777 26; RV32IM-NEXT: mulhu a1, a0, a1 27; RV32IM-NEXT: sub a2, a0, a1 28; RV32IM-NEXT: srli a2, a2, 1 29; RV32IM-NEXT: add a1, a2, a1 30; RV32IM-NEXT: srli a1, a1, 6 31; RV32IM-NEXT: addi a2, zero, 95 32; RV32IM-NEXT: mul a1, a1, a2 33; RV32IM-NEXT: sub a0, a0, a1 34; RV32IM-NEXT: ret 35; 36; RV64I-LABEL: fold_urem_positive_odd: 37; RV64I: # %bb.0: 38; RV64I-NEXT: addi sp, sp, -16 39; RV64I-NEXT: sd ra, 8(sp) 40; RV64I-NEXT: slli a0, a0, 32 41; RV64I-NEXT: srli a0, a0, 32 42; RV64I-NEXT: addi a1, zero, 95 43; RV64I-NEXT: call __umoddi3 44; RV64I-NEXT: ld ra, 8(sp) 45; RV64I-NEXT: addi sp, sp, 16 46; RV64I-NEXT: ret 47; 48; RV64IM-LABEL: fold_urem_positive_odd: 49; RV64IM: # %bb.0: 50; RV64IM-NEXT: slli a0, a0, 32 51; RV64IM-NEXT: srli a0, a0, 32 52; RV64IM-NEXT: lui a1, 1423 53; RV64IM-NEXT: addiw a1, a1, -733 54; RV64IM-NEXT: slli a1, a1, 15 55; RV64IM-NEXT: addi a1, a1, 1035 56; RV64IM-NEXT: slli a1, a1, 13 57; RV64IM-NEXT: addi a1, a1, -1811 58; RV64IM-NEXT: slli a1, a1, 12 59; RV64IM-NEXT: addi a1, a1, 561 60; RV64IM-NEXT: mulhu a1, a0, a1 61; RV64IM-NEXT: sub a2, a0, a1 62; RV64IM-NEXT: srli a2, a2, 1 63; RV64IM-NEXT: add a1, a2, a1 64; RV64IM-NEXT: srli a1, a1, 6 65; RV64IM-NEXT: addi a2, zero, 95 66; RV64IM-NEXT: mul a1, a1, a2 67; RV64IM-NEXT: sub a0, a0, a1 68; RV64IM-NEXT: ret 69 %1 = urem i32 %x, 95 70 ret i32 %1 71} 72 73 74define i32 @fold_urem_positive_even(i32 %x) nounwind { 75; RV32I-LABEL: fold_urem_positive_even: 76; RV32I: # %bb.0: 77; RV32I-NEXT: addi sp, sp, -16 78; RV32I-NEXT: sw ra, 12(sp) 79; RV32I-NEXT: addi a1, zero, 1060 80; RV32I-NEXT: call __umodsi3 81; RV32I-NEXT: lw ra, 12(sp) 82; RV32I-NEXT: addi sp, sp, 16 83; RV32I-NEXT: ret 84; 85; RV32IM-LABEL: fold_urem_positive_even: 86; RV32IM: # %bb.0: 87; RV32IM-NEXT: lui a1, 1012964 88; RV32IM-NEXT: addi a1, a1, -61 89; RV32IM-NEXT: mulhu a1, a0, a1 90; RV32IM-NEXT: srli a1, a1, 10 91; RV32IM-NEXT: addi a2, zero, 1060 92; RV32IM-NEXT: mul a1, a1, a2 93; RV32IM-NEXT: sub a0, a0, a1 94; RV32IM-NEXT: ret 95; 96; RV64I-LABEL: fold_urem_positive_even: 97; RV64I: # %bb.0: 98; RV64I-NEXT: addi sp, sp, -16 99; RV64I-NEXT: sd ra, 8(sp) 100; RV64I-NEXT: slli a0, a0, 32 101; RV64I-NEXT: srli a0, a0, 32 102; RV64I-NEXT: addi a1, zero, 1060 103; RV64I-NEXT: call __umoddi3 104; RV64I-NEXT: ld ra, 8(sp) 105; RV64I-NEXT: addi sp, sp, 16 106; RV64I-NEXT: ret 107; 108; RV64IM-LABEL: fold_urem_positive_even: 109; RV64IM: # %bb.0: 110; RV64IM-NEXT: slli a0, a0, 32 111; RV64IM-NEXT: srli a0, a0, 32 112; RV64IM-NEXT: lui a1, 1048020 113; RV64IM-NEXT: addiw a1, a1, -1793 114; RV64IM-NEXT: slli a1, a1, 12 115; RV64IM-NEXT: addi a1, a1, 139 116; RV64IM-NEXT: slli a1, a1, 14 117; RV64IM-NEXT: addi a1, a1, 1793 118; RV64IM-NEXT: slli a1, a1, 12 119; RV64IM-NEXT: addi a1, a1, -139 120; RV64IM-NEXT: mulhu a1, a0, a1 121; RV64IM-NEXT: srli a1, a1, 10 122; RV64IM-NEXT: addi a2, zero, 1060 123; RV64IM-NEXT: mul a1, a1, a2 124; RV64IM-NEXT: sub a0, a0, a1 125; RV64IM-NEXT: ret 126 %1 = urem i32 %x, 1060 127 ret i32 %1 128} 129 130 131; Don't fold if we can combine urem with udiv. 132define i32 @combine_urem_udiv(i32 %x) nounwind { 133; RV32I-LABEL: combine_urem_udiv: 134; RV32I: # %bb.0: 135; RV32I-NEXT: addi sp, sp, -16 136; RV32I-NEXT: sw ra, 12(sp) 137; RV32I-NEXT: sw s0, 8(sp) 138; RV32I-NEXT: sw s1, 4(sp) 139; RV32I-NEXT: mv s0, a0 140; RV32I-NEXT: addi a1, zero, 95 141; RV32I-NEXT: call __umodsi3 142; RV32I-NEXT: mv s1, a0 143; RV32I-NEXT: addi a1, zero, 95 144; RV32I-NEXT: mv a0, s0 145; RV32I-NEXT: call __udivsi3 146; RV32I-NEXT: add a0, s1, a0 147; RV32I-NEXT: lw s1, 4(sp) 148; RV32I-NEXT: lw s0, 8(sp) 149; RV32I-NEXT: lw ra, 12(sp) 150; RV32I-NEXT: addi sp, sp, 16 151; RV32I-NEXT: ret 152; 153; RV32IM-LABEL: combine_urem_udiv: 154; RV32IM: # %bb.0: 155; RV32IM-NEXT: lui a1, 364242 156; RV32IM-NEXT: addi a1, a1, 777 157; RV32IM-NEXT: mulhu a1, a0, a1 158; RV32IM-NEXT: sub a2, a0, a1 159; RV32IM-NEXT: srli a2, a2, 1 160; RV32IM-NEXT: add a1, a2, a1 161; RV32IM-NEXT: srli a1, a1, 6 162; RV32IM-NEXT: addi a2, zero, 95 163; RV32IM-NEXT: mul a2, a1, a2 164; RV32IM-NEXT: sub a0, a0, a2 165; RV32IM-NEXT: add a0, a0, a1 166; RV32IM-NEXT: ret 167; 168; RV64I-LABEL: combine_urem_udiv: 169; RV64I: # %bb.0: 170; RV64I-NEXT: addi sp, sp, -32 171; RV64I-NEXT: sd ra, 24(sp) 172; RV64I-NEXT: sd s0, 16(sp) 173; RV64I-NEXT: sd s1, 8(sp) 174; RV64I-NEXT: slli a0, a0, 32 175; RV64I-NEXT: srli s0, a0, 32 176; RV64I-NEXT: addi a1, zero, 95 177; RV64I-NEXT: mv a0, s0 178; RV64I-NEXT: call __umoddi3 179; RV64I-NEXT: mv s1, a0 180; RV64I-NEXT: addi a1, zero, 95 181; RV64I-NEXT: mv a0, s0 182; RV64I-NEXT: call __udivdi3 183; RV64I-NEXT: add a0, s1, a0 184; RV64I-NEXT: ld s1, 8(sp) 185; RV64I-NEXT: ld s0, 16(sp) 186; RV64I-NEXT: ld ra, 24(sp) 187; RV64I-NEXT: addi sp, sp, 32 188; RV64I-NEXT: ret 189; 190; RV64IM-LABEL: combine_urem_udiv: 191; RV64IM: # %bb.0: 192; RV64IM-NEXT: slli a0, a0, 32 193; RV64IM-NEXT: srli a0, a0, 32 194; RV64IM-NEXT: lui a1, 1423 195; RV64IM-NEXT: addiw a1, a1, -733 196; RV64IM-NEXT: slli a1, a1, 15 197; RV64IM-NEXT: addi a1, a1, 1035 198; RV64IM-NEXT: slli a1, a1, 13 199; RV64IM-NEXT: addi a1, a1, -1811 200; RV64IM-NEXT: slli a1, a1, 12 201; RV64IM-NEXT: addi a1, a1, 561 202; RV64IM-NEXT: mulhu a1, a0, a1 203; RV64IM-NEXT: sub a2, a0, a1 204; RV64IM-NEXT: srli a2, a2, 1 205; RV64IM-NEXT: add a1, a2, a1 206; RV64IM-NEXT: srli a1, a1, 6 207; RV64IM-NEXT: addi a2, zero, 95 208; RV64IM-NEXT: mul a2, a1, a2 209; RV64IM-NEXT: sub a0, a0, a2 210; RV64IM-NEXT: add a0, a0, a1 211; RV64IM-NEXT: ret 212 %1 = urem i32 %x, 95 213 %2 = udiv i32 %x, 95 214 %3 = add i32 %1, %2 215 ret i32 %3 216} 217 218; Don't fold for divisors that are a power of two. 219define i32 @dont_fold_urem_power_of_two(i32 %x) nounwind { 220; CHECK-LABEL: dont_fold_urem_power_of_two: 221; CHECK: # %bb.0: 222; CHECK-NEXT: andi a0, a0, 63 223; CHECK-NEXT: ret 224 %1 = urem i32 %x, 64 225 ret i32 %1 226} 227 228; Don't fold if the divisor is one. 229define i32 @dont_fold_urem_one(i32 %x) nounwind { 230; CHECK-LABEL: dont_fold_urem_one: 231; CHECK: # %bb.0: 232; CHECK-NEXT: mv a0, zero 233; CHECK-NEXT: ret 234 %1 = urem i32 %x, 1 235 ret i32 %1 236} 237 238; Don't fold if the divisor is 2^32. 239define i32 @dont_fold_urem_i32_umax(i32 %x) nounwind { 240; CHECK-LABEL: dont_fold_urem_i32_umax: 241; CHECK: # %bb.0: 242; CHECK-NEXT: ret 243 %1 = urem i32 %x, 4294967296 244 ret i32 %1 245} 246 247; Don't fold i64 urem 248define i64 @dont_fold_urem_i64(i64 %x) nounwind { 249; RV32I-LABEL: dont_fold_urem_i64: 250; RV32I: # %bb.0: 251; RV32I-NEXT: addi sp, sp, -16 252; RV32I-NEXT: sw ra, 12(sp) 253; RV32I-NEXT: addi a2, zero, 98 254; RV32I-NEXT: mv a3, zero 255; RV32I-NEXT: call __umoddi3 256; RV32I-NEXT: lw ra, 12(sp) 257; RV32I-NEXT: addi sp, sp, 16 258; RV32I-NEXT: ret 259; 260; RV32IM-LABEL: dont_fold_urem_i64: 261; RV32IM: # %bb.0: 262; RV32IM-NEXT: addi sp, sp, -16 263; RV32IM-NEXT: sw ra, 12(sp) 264; RV32IM-NEXT: addi a2, zero, 98 265; RV32IM-NEXT: mv a3, zero 266; RV32IM-NEXT: call __umoddi3 267; RV32IM-NEXT: lw ra, 12(sp) 268; RV32IM-NEXT: addi sp, sp, 16 269; RV32IM-NEXT: ret 270; 271; RV64I-LABEL: dont_fold_urem_i64: 272; RV64I: # %bb.0: 273; RV64I-NEXT: addi sp, sp, -16 274; RV64I-NEXT: sd ra, 8(sp) 275; RV64I-NEXT: addi a1, zero, 98 276; RV64I-NEXT: call __umoddi3 277; RV64I-NEXT: ld ra, 8(sp) 278; RV64I-NEXT: addi sp, sp, 16 279; RV64I-NEXT: ret 280; 281; RV64IM-LABEL: dont_fold_urem_i64: 282; RV64IM: # %bb.0: 283; RV64IM-NEXT: srli a1, a0, 1 284; RV64IM-NEXT: lui a2, 2675 285; RV64IM-NEXT: addiw a2, a2, -251 286; RV64IM-NEXT: slli a2, a2, 13 287; RV64IM-NEXT: addi a2, a2, 1839 288; RV64IM-NEXT: slli a2, a2, 13 289; RV64IM-NEXT: addi a2, a2, 167 290; RV64IM-NEXT: slli a2, a2, 13 291; RV64IM-NEXT: addi a2, a2, 1505 292; RV64IM-NEXT: mulhu a1, a1, a2 293; RV64IM-NEXT: srli a1, a1, 4 294; RV64IM-NEXT: addi a2, zero, 98 295; RV64IM-NEXT: mul a1, a1, a2 296; RV64IM-NEXT: sub a0, a0, a1 297; RV64IM-NEXT: ret 298 %1 = urem i64 %x, 98 299 ret i64 %1 300} 301