1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx704 < %s | FileCheck -check-prefix=GFX7 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 5 6define i32 @s_add_co_select_user() { 7; GFX7-LABEL: s_add_co_select_user: 8; GFX7: ; %bb.0: ; %bb 9; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX7-NEXT: s_mov_b64 s[4:5], 0 11; GFX7-NEXT: s_load_dword s6, s[4:5], 0x0 12; GFX7-NEXT: s_waitcnt lgkmcnt(0) 13; GFX7-NEXT: v_add_i32_e64 v0, s[4:5], s6, s6 14; GFX7-NEXT: s_or_b32 s4, s4, s5 15; GFX7-NEXT: s_cmp_lg_u32 s4, 0 16; GFX7-NEXT: s_addc_u32 s4, s6, 0 17; GFX7-NEXT: v_mov_b32_e32 v1, s4 18; GFX7-NEXT: s_cselect_b64 vcc, 1, 0 19; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 20; GFX7-NEXT: v_cmp_gt_u32_e64 vcc, s6, 31 21; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 22; GFX7-NEXT: s_setpc_b64 s[30:31] 23; 24; GFX9-LABEL: s_add_co_select_user: 25; GFX9: ; %bb.0: ; %bb 26; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX9-NEXT: s_mov_b64 s[4:5], 0 28; GFX9-NEXT: s_load_dword s6, s[4:5], 0x0 29; GFX9-NEXT: s_waitcnt lgkmcnt(0) 30; GFX9-NEXT: v_add_co_u32_e64 v0, s[4:5], s6, s6 31; GFX9-NEXT: s_cmp_lg_u64 s[4:5], 0 32; GFX9-NEXT: s_addc_u32 s4, s6, 0 33; GFX9-NEXT: s_cselect_b64 vcc, 1, 0 34; GFX9-NEXT: v_mov_b32_e32 v1, s4 35; GFX9-NEXT: s_cmp_gt_u32 s6, 31 36; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 37; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 38; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 39; GFX9-NEXT: s_setpc_b64 s[30:31] 40; 41; GFX10-LABEL: s_add_co_select_user: 42; GFX10: ; %bb.0: ; %bb 43; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 45; GFX10-NEXT: s_mov_b64 s[4:5], 0 46; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0 47; GFX10-NEXT: s_waitcnt lgkmcnt(0) 48; GFX10-NEXT: v_add_co_u32_e64 v0, s5, s4, s4 49; GFX10-NEXT: s_cmpk_lg_u32 s5, 0x0 50; GFX10-NEXT: s_addc_u32 s5, s4, 0 51; GFX10-NEXT: s_cselect_b32 s6, 1, 0 52; GFX10-NEXT: s_cmp_gt_u32 s4, 31 53; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, s5, s6 54; GFX10-NEXT: s_cselect_b32 vcc_lo, -1, 0 55; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 56; GFX10-NEXT: s_setpc_b64 s[30:31] 57bb: 58 %i = load volatile i32, i32 addrspace(4)* null, align 8 59 %i1 = add i32 %i, %i 60 %i2 = icmp ult i32 %i1, %i 61 %i3 = zext i1 %i2 to i32 62 %i4 = add nuw nsw i32 %i3, 0 63 %i5 = add i32 %i4, %i 64 %i6 = icmp ult i32 %i5, %i4 65 %i7 = select i1 %i6, i32 %i5, i32 0 66 %i8 = icmp ugt i32 %i, 31 67 %i9 = select i1 %i8, i32 %i1, i32 %i7 68 ret i32 %i9 69} 70 71define amdgpu_kernel void @s_add_co_br_user(i32 %i) { 72; GFX7-LABEL: s_add_co_br_user: 73; GFX7: ; %bb.0: ; %bb 74; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 75; GFX7-NEXT: s_waitcnt lgkmcnt(0) 76; GFX7-NEXT: s_add_i32 s1, s0, s0 77; GFX7-NEXT: v_mov_b32_e32 v0, s0 78; GFX7-NEXT: v_cmp_lt_u32_e32 vcc, s1, v0 79; GFX7-NEXT: s_or_b32 s1, vcc_lo, vcc_hi 80; GFX7-NEXT: s_cmp_lg_u32 s1, 0 81; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 82; GFX7-NEXT: s_addc_u32 s0, s0, 0 83; GFX7-NEXT: v_cmp_ge_u32_e32 vcc, s0, v0 84; GFX7-NEXT: s_and_b64 vcc, exec, vcc 85; GFX7-NEXT: s_cbranch_vccnz BB1_2 86; GFX7-NEXT: ; %bb.1: ; %bb0 87; GFX7-NEXT: v_mov_b32_e32 v0, 0 88; GFX7-NEXT: v_mov_b32_e32 v2, 9 89; GFX7-NEXT: v_mov_b32_e32 v1, 0 90; GFX7-NEXT: flat_store_dword v[0:1], v2 91; GFX7-NEXT: BB1_2: ; %bb1 92; GFX7-NEXT: v_mov_b32_e32 v0, 0 93; GFX7-NEXT: v_mov_b32_e32 v2, 10 94; GFX7-NEXT: v_mov_b32_e32 v1, 0 95; GFX7-NEXT: flat_store_dword v[0:1], v2 96; GFX7-NEXT: s_endpgm 97; 98; GFX9-LABEL: s_add_co_br_user: 99; GFX9: ; %bb.0: ; %bb 100; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 101; GFX9-NEXT: s_waitcnt lgkmcnt(0) 102; GFX9-NEXT: s_add_i32 s1, s0, s0 103; GFX9-NEXT: v_mov_b32_e32 v0, s0 104; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, s1, v0 105; GFX9-NEXT: s_cmp_lg_u64 vcc, 0 106; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 107; GFX9-NEXT: s_addc_u32 s0, s0, 0 108; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, s0, v0 109; GFX9-NEXT: s_and_b64 vcc, exec, vcc 110; GFX9-NEXT: s_cbranch_vccnz BB1_2 111; GFX9-NEXT: ; %bb.1: ; %bb0 112; GFX9-NEXT: v_mov_b32_e32 v0, 0 113; GFX9-NEXT: v_mov_b32_e32 v2, 9 114; GFX9-NEXT: v_mov_b32_e32 v1, 0 115; GFX9-NEXT: global_store_dword v[0:1], v2, off 116; GFX9-NEXT: BB1_2: ; %bb1 117; GFX9-NEXT: v_mov_b32_e32 v0, 0 118; GFX9-NEXT: v_mov_b32_e32 v2, 10 119; GFX9-NEXT: v_mov_b32_e32 v1, 0 120; GFX9-NEXT: global_store_dword v[0:1], v2, off 121; GFX9-NEXT: s_endpgm 122; 123; GFX10-LABEL: s_add_co_br_user: 124; GFX10: ; %bb.0: ; %bb 125; GFX10-NEXT: s_load_dword s0, s[4:5], 0x0 126; GFX10-NEXT: s_waitcnt lgkmcnt(0) 127; GFX10-NEXT: s_add_i32 s1, s0, s0 128; GFX10-NEXT: v_cmp_lt_u32_e64 s1, s1, s0 129; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1 130; GFX10-NEXT: s_cmpk_lg_u32 s1, 0x0 131; GFX10-NEXT: s_addc_u32 s0, s0, 0 132; GFX10-NEXT: v_cmp_ge_u32_e32 vcc_lo, s0, v0 133; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo 134; GFX10-NEXT: s_cbranch_vccnz BB1_2 135; GFX10-NEXT: ; %bb.1: ; %bb0 136; GFX10-NEXT: v_mov_b32_e32 v0, 0 137; GFX10-NEXT: v_mov_b32_e32 v2, 9 138; GFX10-NEXT: v_mov_b32_e32 v1, 0 139; GFX10-NEXT: global_store_dword v[0:1], v2, off 140; GFX10-NEXT: BB1_2: ; %bb1 141; GFX10-NEXT: v_mov_b32_e32 v0, 0 142; GFX10-NEXT: v_mov_b32_e32 v2, 10 143; GFX10-NEXT: v_mov_b32_e32 v1, 0 144; GFX10-NEXT: global_store_dword v[0:1], v2, off 145; GFX10-NEXT: s_endpgm 146bb: 147 %i1 = add i32 %i, %i 148 %i2 = icmp ult i32 %i1, %i 149 %i3 = zext i1 %i2 to i32 150 %i4 = add nuw nsw i32 %i3, 0 151 %i5 = add i32 %i4, %i 152 %i6 = icmp ult i32 %i5, %i4 153 %i7 = select i1 %i6, i32 %i5, i32 0 154 br i1 %i6, label %bb0, label %bb1 155 156bb0: 157 store volatile i32 9, i32 addrspace(1)* null 158 br label %bb1 159 160bb1: 161 store volatile i32 10, i32 addrspace(1)* null 162 ret void 163} 164