1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s 3; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s 4 5; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. 6 7define i32 @v_srem_i32(i32 %num, i32 %den) { 8; GISEL-LABEL: v_srem_i32: 9; GISEL: ; %bb.0: 10; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 12; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 13; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 14; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 15; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 16; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 17; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1 18; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 19; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3 20; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 21; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 22; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3 23; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 24; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 25; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3 26; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1 27; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 28; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 29; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 30; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 31; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 32; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 33; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 34; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 35; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 36; GISEL-NEXT: s_setpc_b64 s[30:31] 37; 38; CGP-LABEL: v_srem_i32: 39; CGP: ; %bb.0: 40; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 42; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v1 43; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 44; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3 45; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 46; CGP-NEXT: v_xor_b32_e32 v1, v1, v3 47; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1 48; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 49; CGP-NEXT: v_rcp_f32_e32 v3, v3 50; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 51; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 52; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 53; CGP-NEXT: v_mul_lo_u32 v5, 0, v4 54; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 55; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 56; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 57; CGP-NEXT: v_mul_lo_u32 v4, 0, v3 58; CGP-NEXT: v_mul_hi_u32 v3, v0, v3 59; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 60; CGP-NEXT: v_mul_lo_u32 v3, v3, v1 61; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 62; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 63; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 64; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 65; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 66; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 67; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 68; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 69; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 70; CGP-NEXT: s_setpc_b64 s[30:31] 71 %result = srem i32 %num, %den 72 ret i32 %result 73} 74 75; FIXME: This is a workaround for not handling uniform VGPR case. 76declare i32 @llvm.amdgcn.readfirstlane(i32) 77 78define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) { 79; GISEL-LABEL: s_srem_i32: 80; GISEL: ; %bb.0: 81; GISEL-NEXT: s_ashr_i32 s2, s0, 31 82; GISEL-NEXT: s_ashr_i32 s3, s1, 31 83; GISEL-NEXT: s_add_i32 s0, s0, s2 84; GISEL-NEXT: s_add_i32 s1, s1, s3 85; GISEL-NEXT: s_xor_b32 s0, s0, s2 86; GISEL-NEXT: s_xor_b32 s1, s1, s3 87; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1 88; GISEL-NEXT: s_sub_i32 s3, 0, s1 89; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 90; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 91; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 92; GISEL-NEXT: v_mul_lo_u32 v1, s3, v0 93; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1 94; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 95; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0 96; GISEL-NEXT: v_mul_lo_u32 v0, v0, s1 97; GISEL-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 98; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 99; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 100; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 101; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 102; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 103; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 104; GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 105; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0 106; GISEL-NEXT: v_readfirstlane_b32 s0, v0 107; GISEL-NEXT: ; return to shader part epilog 108; 109; CGP-LABEL: s_srem_i32: 110; CGP: ; %bb.0: 111; CGP-NEXT: s_ashr_i32 s2, s0, 31 112; CGP-NEXT: s_ashr_i32 s3, s1, 31 113; CGP-NEXT: s_add_i32 s0, s0, s2 114; CGP-NEXT: s_add_i32 s1, s1, s3 115; CGP-NEXT: s_xor_b32 s0, s0, s2 116; CGP-NEXT: s_xor_b32 s1, s1, s3 117; CGP-NEXT: v_cvt_f32_u32_e32 v0, s1 118; CGP-NEXT: s_sub_i32 s3, 0, s1 119; CGP-NEXT: v_rcp_f32_e32 v0, v0 120; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 121; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 122; CGP-NEXT: v_mul_lo_u32 v1, s3, v0 123; CGP-NEXT: v_mul_lo_u32 v2, 0, v1 124; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 125; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 126; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 127; CGP-NEXT: v_mul_lo_u32 v1, 0, v0 128; CGP-NEXT: v_mul_hi_u32 v0, s0, v0 129; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 130; CGP-NEXT: v_mul_lo_u32 v0, v0, s1 131; CGP-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 132; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 133; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 134; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 135; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 136; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 137; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 138; CGP-NEXT: v_xor_b32_e32 v0, s2, v0 139; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0 140; CGP-NEXT: v_readfirstlane_b32 s0, v0 141; CGP-NEXT: ; return to shader part epilog 142 %result = srem i32 %num, %den 143 %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result) 144 ret i32 %readlane 145} 146 147define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) { 148; GISEL-LABEL: v_srem_v2i32: 149; GISEL: ; %bb.0: 150; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 151; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 152; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v2 153; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1 154; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 155; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 156; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 157; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6 158; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 159; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 160; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 161; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 162; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 163; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2 164; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 165; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3 166; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 167; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 168; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 169; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5 170; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 171; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 172; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 173; GISEL-NEXT: v_mul_lo_u32 v7, v7, v5 174; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8 175; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7 176; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 177; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 178; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 179; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 180; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 181; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2 182; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3 183; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 184; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 185; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 186; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 187; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 188; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 189; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 190; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 191; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 192; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 193; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 194; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 195; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 196; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 197; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 198; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 199; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 200; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 201; GISEL-NEXT: s_setpc_b64 s[30:31] 202; 203; CGP-LABEL: v_srem_v2i32: 204; CGP: ; %bb.0: 205; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0 207; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v2 208; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1 209; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 210; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 211; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5 212; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6 213; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 214; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 215; CGP-NEXT: v_xor_b32_e32 v2, v2, v5 216; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 217; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 218; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2 219; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 220; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3 221; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 222; CGP-NEXT: v_rcp_f32_e32 v5, v5 223; CGP-NEXT: v_rcp_f32_e32 v8, v8 224; CGP-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5 225; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 226; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 227; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 228; CGP-NEXT: v_mul_lo_u32 v7, v7, v5 229; CGP-NEXT: v_mul_lo_u32 v9, v9, v8 230; CGP-NEXT: v_mul_lo_u32 v10, 0, v7 231; CGP-NEXT: v_mul_hi_u32 v7, v5, v7 232; CGP-NEXT: v_mul_lo_u32 v11, 0, v9 233; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 234; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 235; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 236; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 237; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 238; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 239; CGP-NEXT: v_mul_hi_u32 v5, v0, v5 240; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 241; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 242; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 243; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 244; CGP-NEXT: v_mul_lo_u32 v5, v5, v2 245; CGP-NEXT: v_mul_lo_u32 v7, v7, v3 246; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 247; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 248; CGP-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 249; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 250; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 251; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 252; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 253; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 254; CGP-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 255; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 256; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 257; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 258; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 259; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 260; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 261; CGP-NEXT: v_xor_b32_e32 v1, v1, v6 262; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 263; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 264; CGP-NEXT: s_setpc_b64 s[30:31] 265 %result = srem <2 x i32> %num, %den 266 ret <2 x i32> %result 267} 268 269define i32 @v_srem_i32_pow2k_denom(i32 %num) { 270; CHECK-LABEL: v_srem_i32_pow2k_denom: 271; CHECK: ; %bb.0: 272; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 273; CHECK-NEXT: s_movk_i32 s4, 0x1000 274; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 275; CHECK-NEXT: v_mov_b32_e32 v2, 0xfffff000 276; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 277; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s4 278; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 279; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 280; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 281; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 282; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3 283; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2 284; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 285; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 286; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v2 287; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 288; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 289; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 290; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 291; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 292; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 293; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 294; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 295; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 296; CHECK-NEXT: s_setpc_b64 s[30:31] 297 %result = srem i32 %num, 4096 298 ret i32 %result 299} 300 301define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) { 302; GISEL-LABEL: v_srem_v2i32_pow2k_denom: 303; GISEL: ; %bb.0: 304; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 305; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 306; GISEL-NEXT: s_add_i32 s4, 0x1000, 0 307; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 308; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 309; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s4 310; GISEL-NEXT: s_sub_i32 s5, 0, s4 311; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 312; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 313; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 314; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 315; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4 316; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 317; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 318; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 319; GISEL-NEXT: v_mul_lo_u32 v6, s5, v5 320; GISEL-NEXT: v_mul_lo_u32 v7, s5, v4 321; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6 322; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7 323; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 324; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 325; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 326; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4 327; GISEL-NEXT: v_mul_lo_u32 v5, v5, s4 328; GISEL-NEXT: v_mul_lo_u32 v4, v4, s4 329; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 330; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 331; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 332; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1 333; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 334; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 335; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 336; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 337; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 338; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1 339; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 340; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 341; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 342; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 343; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 344; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 345; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 346; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 347; GISEL-NEXT: s_setpc_b64 s[30:31] 348; 349; CGP-LABEL: v_srem_v2i32_pow2k_denom: 350; CGP: ; %bb.0: 351; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 352; CGP-NEXT: s_movk_i32 s4, 0x1000 353; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 354; CGP-NEXT: v_mov_b32_e32 v3, 0x1000 355; CGP-NEXT: s_movk_i32 s5, 0xf000 356; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000 357; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 358; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 359; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4 360; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 361; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 362; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 363; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6 364; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 365; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7 366; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 367; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 368; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 369; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 370; CGP-NEXT: v_mul_lo_u32 v8, s5, v6 371; CGP-NEXT: v_mul_lo_u32 v4, v4, v7 372; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 373; CGP-NEXT: v_mul_hi_u32 v4, v7, v4 374; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 375; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 376; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 377; CGP-NEXT: v_mul_hi_u32 v4, v1, v4 378; CGP-NEXT: v_lshlrev_b32_e32 v6, 12, v6 379; CGP-NEXT: v_lshlrev_b32_e32 v4, 12, v4 380; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 381; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 382; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 383; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3 384; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 385; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 386; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 387; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 388; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 389; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3 390; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 391; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 392; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 393; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 394; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 395; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 396; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 397; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 398; CGP-NEXT: s_setpc_b64 s[30:31] 399 %result = srem <2 x i32> %num, <i32 4096, i32 4096> 400 ret <2 x i32> %result 401} 402 403define i32 @v_srem_i32_oddk_denom(i32 %num) { 404; CHECK-LABEL: v_srem_i32_oddk_denom: 405; CHECK: ; %bb.0: 406; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 407; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb 408; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 409; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705 410; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 411; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s4 412; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 413; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 414; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 415; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 416; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3 417; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2 418; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 419; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 420; CHECK-NEXT: v_mul_lo_u32 v2, v2, s4 421; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 422; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 423; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 424; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 425; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 426; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 427; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 428; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 429; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 430; CHECK-NEXT: s_setpc_b64 s[30:31] 431 %result = srem i32 %num, 1235195 432 ret i32 %result 433} 434 435define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) { 436; GISEL-LABEL: v_srem_v2i32_oddk_denom: 437; GISEL: ; %bb.0: 438; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 439; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 440; GISEL-NEXT: s_add_i32 s4, 0x12d8fb, 0 441; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 442; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 443; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s4 444; GISEL-NEXT: s_sub_i32 s5, 0, s4 445; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 446; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 447; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 448; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 449; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4 450; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 451; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 452; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 453; GISEL-NEXT: v_mul_lo_u32 v6, s5, v5 454; GISEL-NEXT: v_mul_lo_u32 v7, s5, v4 455; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6 456; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7 457; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 458; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 459; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 460; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4 461; GISEL-NEXT: v_mul_lo_u32 v5, v5, s4 462; GISEL-NEXT: v_mul_lo_u32 v4, v4, s4 463; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 464; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 465; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 466; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1 467; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 468; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 469; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 470; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 471; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 472; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1 473; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 474; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 475; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 476; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 477; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 478; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 479; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 480; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 481; GISEL-NEXT: s_setpc_b64 s[30:31] 482; 483; CGP-LABEL: v_srem_v2i32_oddk_denom: 484; CGP: ; %bb.0: 485; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 486; CGP-NEXT: s_mov_b32 s4, 0x12d8fb 487; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 488; CGP-NEXT: v_mov_b32_e32 v3, 0x12d8fb 489; CGP-NEXT: s_mov_b32 s5, 0xffed2705 490; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705 491; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 492; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 493; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4 494; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 495; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 496; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 497; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6 498; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 499; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7 500; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 501; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 502; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 503; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 504; CGP-NEXT: v_mul_lo_u32 v8, s5, v6 505; CGP-NEXT: v_mul_lo_u32 v4, v4, v7 506; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 507; CGP-NEXT: v_mul_hi_u32 v4, v7, v4 508; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 509; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 510; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 511; CGP-NEXT: v_mul_hi_u32 v4, v1, v4 512; CGP-NEXT: v_mul_lo_u32 v6, v6, s4 513; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 514; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 515; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 516; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 517; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3 518; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 519; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 520; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 521; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 522; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 523; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3 524; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 525; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 526; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 527; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 528; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 529; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 530; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 531; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 532; CGP-NEXT: s_setpc_b64 s[30:31] 533 %result = srem <2 x i32> %num, <i32 1235195, i32 1235195> 534 ret <2 x i32> %result 535} 536 537define i32 @v_srem_i32_pow2_shl_denom(i32 %x, i32 %y) { 538; CHECK-LABEL: v_srem_i32_pow2_shl_denom: 539; CHECK: ; %bb.0: 540; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 541; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1 542; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0 543; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 544; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 545; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3 546; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 547; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 548; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v1 549; CHECK-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 550; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 551; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 552; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 553; CHECK-NEXT: v_mul_lo_u32 v4, v4, v3 554; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4 555; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 556; CHECK-NEXT: v_mul_hi_u32 v3, v0, v3 557; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1 558; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 559; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 560; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 561; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 562; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 563; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 564; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 565; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 566; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 567; CHECK-NEXT: s_setpc_b64 s[30:31] 568 %shl.y = shl i32 4096, %y 569 %r = srem i32 %x, %shl.y 570 ret i32 %r 571} 572 573define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { 574; GISEL-LABEL: v_srem_v2i32_pow2_shl_denom: 575; GISEL: ; %bb.0: 576; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 577; GISEL-NEXT: s_movk_i32 s4, 0x1000 578; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 579; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1 580; GISEL-NEXT: v_lshl_b32_e32 v2, s4, v2 581; GISEL-NEXT: v_lshl_b32_e32 v3, s4, v3 582; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 583; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5 584; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v2 585; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 586; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 587; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 588; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6 589; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 590; GISEL-NEXT: v_xor_b32_e32 v2, v2, v6 591; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 592; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v2 593; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 594; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3 595; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 596; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 597; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 598; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 599; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 600; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 601; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 602; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 603; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8 604; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 605; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 606; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 607; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 608; GISEL-NEXT: v_mul_hi_u32 v6, v0, v6 609; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 610; GISEL-NEXT: v_mul_lo_u32 v6, v6, v2 611; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3 612; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 613; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 614; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 615; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 616; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 617; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 618; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 619; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 620; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 621; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 622; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 623; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 624; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 625; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 626; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 627; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 628; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 629; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 630; GISEL-NEXT: s_setpc_b64 s[30:31] 631; 632; CGP-LABEL: v_srem_v2i32_pow2_shl_denom: 633; CGP: ; %bb.0: 634; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 635; CGP-NEXT: s_movk_i32 s4, 0x1000 636; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0 637; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 638; CGP-NEXT: v_lshl_b32_e32 v2, s4, v2 639; CGP-NEXT: v_lshl_b32_e32 v3, s4, v3 640; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 641; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 642; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v2 643; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 644; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 645; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 646; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 647; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 648; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 649; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 650; CGP-NEXT: v_cvt_f32_u32_e32 v6, v2 651; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 652; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3 653; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 654; CGP-NEXT: v_rcp_f32_e32 v6, v6 655; CGP-NEXT: v_rcp_f32_e32 v8, v8 656; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 657; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 658; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 659; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 660; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 661; CGP-NEXT: v_mul_lo_u32 v9, v9, v8 662; CGP-NEXT: v_mul_lo_u32 v10, 0, v7 663; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 664; CGP-NEXT: v_mul_lo_u32 v11, 0, v9 665; CGP-NEXT: v_mul_hi_u32 v9, v8, v9 666; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 667; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 668; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 669; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9 670; CGP-NEXT: v_mul_lo_u32 v8, 0, v6 671; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 672; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 673; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 674; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 675; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 676; CGP-NEXT: v_mul_lo_u32 v6, v6, v2 677; CGP-NEXT: v_mul_lo_u32 v7, v7, v3 678; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 679; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 680; CGP-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 681; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 682; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 683; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 684; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 685; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 686; CGP-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 687; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 688; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 689; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 690; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 691; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 692; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 693; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 694; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 695; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 696; CGP-NEXT: s_setpc_b64 s[30:31] 697 %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y 698 %r = srem <2 x i32> %x, %shl.y 699 ret <2 x i32> %r 700} 701 702define i32 @v_srem_i32_24bit(i32 %num, i32 %den) { 703; GISEL-LABEL: v_srem_i32_24bit: 704; GISEL: ; %bb.0: 705; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 706; GISEL-NEXT: s_mov_b32 s4, 0xffffff 707; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 708; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 709; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 710; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 711; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 712; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 713; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 714; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 715; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1 716; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 717; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3 718; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 719; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 720; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3 721; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 722; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 723; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3 724; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1 725; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 726; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 727; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 728; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 729; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 730; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 731; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 732; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 733; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 734; GISEL-NEXT: s_setpc_b64 s[30:31] 735; 736; CGP-LABEL: v_srem_i32_24bit: 737; CGP: ; %bb.0: 738; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 739; CGP-NEXT: s_mov_b32 s4, 0xffffff 740; CGP-NEXT: v_and_b32_e32 v0, s4, v0 741; CGP-NEXT: v_and_b32_e32 v1, s4, v1 742; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 743; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 744; CGP-NEXT: v_rcp_f32_e32 v2, v2 745; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 746; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 747; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 748; CGP-NEXT: v_mul_lo_u32 v4, 0, v3 749; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 750; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 751; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3 752; CGP-NEXT: v_mul_lo_u32 v3, 0, v2 753; CGP-NEXT: v_mul_hi_u32 v2, v0, v2 754; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2 755; CGP-NEXT: v_mul_lo_u32 v2, v2, v1 756; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 757; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 758; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 759; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 760; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 761; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 762; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 763; CGP-NEXT: s_setpc_b64 s[30:31] 764 %num.mask = and i32 %num, 16777215 765 %den.mask = and i32 %den, 16777215 766 %result = srem i32 %num.mask, %den.mask 767 ret i32 %result 768} 769 770define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { 771; GISEL-LABEL: v_srem_v2i32_24bit: 772; GISEL: ; %bb.0: 773; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 774; GISEL-NEXT: s_mov_b32 s4, 0xffffff 775; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 776; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 777; GISEL-NEXT: v_and_b32_e32 v2, s4, v2 778; GISEL-NEXT: v_and_b32_e32 v3, s4, v3 779; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0 780; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v2 781; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1 782; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3 783; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 784; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 785; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6 786; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7 787; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 788; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 789; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 790; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 791; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2 792; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 793; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3 794; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 795; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 796; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 797; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5 798; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 799; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 800; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 801; GISEL-NEXT: v_mul_lo_u32 v7, v7, v5 802; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8 803; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7 804; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 805; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 806; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 807; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 808; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 809; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2 810; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3 811; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 812; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 813; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 814; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 815; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 816; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 817; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 818; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 819; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 820; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 821; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 822; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 823; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 824; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 825; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 826; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 827; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 828; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 829; GISEL-NEXT: s_setpc_b64 s[30:31] 830; 831; CGP-LABEL: v_srem_v2i32_24bit: 832; CGP: ; %bb.0: 833; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 834; CGP-NEXT: s_mov_b32 s4, 0xffffff 835; CGP-NEXT: v_and_b32_e32 v0, s4, v0 836; CGP-NEXT: v_and_b32_e32 v1, s4, v1 837; CGP-NEXT: v_and_b32_e32 v2, s4, v2 838; CGP-NEXT: v_and_b32_e32 v3, s4, v3 839; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 840; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 841; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 842; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 843; CGP-NEXT: v_rcp_f32_e32 v4, v4 844; CGP-NEXT: v_rcp_f32_e32 v6, v6 845; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 846; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 847; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 848; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 849; CGP-NEXT: v_mul_lo_u32 v5, v5, v4 850; CGP-NEXT: v_mul_lo_u32 v7, v7, v6 851; CGP-NEXT: v_mul_lo_u32 v8, 0, v5 852; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 853; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 854; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 855; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 856; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 857; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5 858; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7 859; CGP-NEXT: v_mul_lo_u32 v6, 0, v4 860; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 861; CGP-NEXT: v_mul_lo_u32 v7, 0, v5 862; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 863; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 864; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 865; CGP-NEXT: v_mul_lo_u32 v4, v4, v2 866; CGP-NEXT: v_mul_lo_u32 v5, v5, v3 867; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 868; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 869; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 870; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 871; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 872; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 873; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 874; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 875; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 876; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 877; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 878; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 879; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 880; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 881; CGP-NEXT: s_setpc_b64 s[30:31] 882 %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215> 883 %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215> 884 %result = srem <2 x i32> %num.mask, %den.mask 885 ret <2 x i32> %result 886} 887