1; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 3 4declare i32 @llvm.ctpop.i32(i32) nounwind readnone 5declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone 6declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone 7declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone 8declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone 9 10; FUNC-LABEL: @s_ctpop_i32: 11; SI: S_LOAD_DWORD [[SVAL:s[0-9]+]], 12; SI: S_BCNT1_I32_B32 [[SRESULT:s[0-9]+]], [[SVAL]] 13; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] 14; SI: BUFFER_STORE_DWORD [[VRESULT]], 15; SI: S_ENDPGM 16 17; EG: BCNT_INT 18define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { 19 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 20 store i32 %ctpop, i32 addrspace(1)* %out, align 4 21 ret void 22} 23 24; XXX - Why 0 in register? 25; FUNC-LABEL: @v_ctpop_i32: 26; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], 27; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0 28; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]] 29; SI: BUFFER_STORE_DWORD [[RESULT]], 30; SI: S_ENDPGM 31 32; EG: BCNT_INT 33define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 34 %val = load i32 addrspace(1)* %in, align 4 35 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 36 store i32 %ctpop, i32 addrspace(1)* %out, align 4 37 ret void 38} 39 40; FUNC-LABEL: @v_ctpop_add_chain_i32 41; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], 42; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], 43; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0 44; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]] 45; SI-NOT: ADD 46; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] 47; SI: BUFFER_STORE_DWORD [[RESULT]], 48; SI: S_ENDPGM 49 50; EG: BCNT_INT 51; EG: BCNT_INT 52define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind { 53 %val0 = load i32 addrspace(1)* %in0, align 4 54 %val1 = load i32 addrspace(1)* %in1, align 4 55 %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone 56 %ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone 57 %add = add i32 %ctpop0, %ctpop1 58 store i32 %add, i32 addrspace(1)* %out, align 4 59 ret void 60} 61 62; FUNC-LABEL: @v_ctpop_v2i32: 63; SI: V_BCNT_U32_B32_e32 64; SI: V_BCNT_U32_B32_e32 65; SI: S_ENDPGM 66 67; EG: BCNT_INT 68; EG: BCNT_INT 69define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind { 70 %val = load <2 x i32> addrspace(1)* %in, align 8 71 %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone 72 store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8 73 ret void 74} 75 76; FUNC-LABEL: @v_ctpop_v4i32: 77; SI: V_BCNT_U32_B32_e32 78; SI: V_BCNT_U32_B32_e32 79; SI: V_BCNT_U32_B32_e32 80; SI: V_BCNT_U32_B32_e32 81; SI: S_ENDPGM 82 83; EG: BCNT_INT 84; EG: BCNT_INT 85; EG: BCNT_INT 86; EG: BCNT_INT 87define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind { 88 %val = load <4 x i32> addrspace(1)* %in, align 16 89 %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone 90 store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16 91 ret void 92} 93 94; FUNC-LABEL: @v_ctpop_v8i32: 95; SI: V_BCNT_U32_B32_e32 96; SI: V_BCNT_U32_B32_e32 97; SI: V_BCNT_U32_B32_e32 98; SI: V_BCNT_U32_B32_e32 99; SI: V_BCNT_U32_B32_e32 100; SI: V_BCNT_U32_B32_e32 101; SI: V_BCNT_U32_B32_e32 102; SI: V_BCNT_U32_B32_e32 103; SI: S_ENDPGM 104 105; EG: BCNT_INT 106; EG: BCNT_INT 107; EG: BCNT_INT 108; EG: BCNT_INT 109; EG: BCNT_INT 110; EG: BCNT_INT 111; EG: BCNT_INT 112; EG: BCNT_INT 113define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind { 114 %val = load <8 x i32> addrspace(1)* %in, align 32 115 %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone 116 store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32 117 ret void 118} 119 120; FUNC-LABEL: @v_ctpop_v16i32: 121; SI: V_BCNT_U32_B32_e32 122; SI: V_BCNT_U32_B32_e32 123; SI: V_BCNT_U32_B32_e32 124; SI: V_BCNT_U32_B32_e32 125; SI: V_BCNT_U32_B32_e32 126; SI: V_BCNT_U32_B32_e32 127; SI: V_BCNT_U32_B32_e32 128; SI: V_BCNT_U32_B32_e32 129; SI: V_BCNT_U32_B32_e32 130; SI: V_BCNT_U32_B32_e32 131; SI: V_BCNT_U32_B32_e32 132; SI: V_BCNT_U32_B32_e32 133; SI: V_BCNT_U32_B32_e32 134; SI: V_BCNT_U32_B32_e32 135; SI: V_BCNT_U32_B32_e32 136; SI: V_BCNT_U32_B32_e32 137; SI: S_ENDPGM 138 139; EG: BCNT_INT 140; EG: BCNT_INT 141; EG: BCNT_INT 142; EG: BCNT_INT 143; EG: BCNT_INT 144; EG: BCNT_INT 145; EG: BCNT_INT 146; EG: BCNT_INT 147; EG: BCNT_INT 148; EG: BCNT_INT 149; EG: BCNT_INT 150; EG: BCNT_INT 151; EG: BCNT_INT 152; EG: BCNT_INT 153; EG: BCNT_INT 154; EG: BCNT_INT 155define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind { 156 %val = load <16 x i32> addrspace(1)* %in, align 32 157 %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone 158 store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32 159 ret void 160} 161 162; FUNC-LABEL: @v_ctpop_i32_add_inline_constant: 163; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], 164; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 165; SI: BUFFER_STORE_DWORD [[RESULT]], 166; SI: S_ENDPGM 167 168; EG: BCNT_INT 169define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 170 %val = load i32 addrspace(1)* %in, align 4 171 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 172 %add = add i32 %ctpop, 4 173 store i32 %add, i32 addrspace(1)* %out, align 4 174 ret void 175} 176 177; FUNC-LABEL: @v_ctpop_i32_add_inline_constant_inv: 178; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], 179; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 180; SI: BUFFER_STORE_DWORD [[RESULT]], 181; SI: S_ENDPGM 182 183; EG: BCNT_INT 184define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 185 %val = load i32 addrspace(1)* %in, align 4 186 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 187 %add = add i32 4, %ctpop 188 store i32 %add, i32 addrspace(1)* %out, align 4 189 ret void 190} 191 192; FUNC-LABEL: @v_ctpop_i32_add_literal: 193; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], 194; SI: V_MOV_B32_e32 [[LIT:v[0-9]+]], 0x1869f 195; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] 196; SI: BUFFER_STORE_DWORD [[RESULT]], 197; SI: S_ENDPGM 198define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 199 %val = load i32 addrspace(1)* %in, align 4 200 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 201 %add = add i32 %ctpop, 99999 202 store i32 %add, i32 addrspace(1)* %out, align 4 203 ret void 204} 205 206; FUNC-LABEL: @v_ctpop_i32_add_var: 207; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], 208; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]], 209; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] 210; SI: BUFFER_STORE_DWORD [[RESULT]], 211; SI: S_ENDPGM 212 213; EG: BCNT_INT 214define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { 215 %val = load i32 addrspace(1)* %in, align 4 216 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 217 %add = add i32 %ctpop, %const 218 store i32 %add, i32 addrspace(1)* %out, align 4 219 ret void 220} 221 222; FUNC-LABEL: @v_ctpop_i32_add_var_inv: 223; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], 224; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]], 225; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] 226; SI: BUFFER_STORE_DWORD [[RESULT]], 227; SI: S_ENDPGM 228 229; EG: BCNT_INT 230define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { 231 %val = load i32 addrspace(1)* %in, align 4 232 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 233 %add = add i32 %const, %ctpop 234 store i32 %add, i32 addrspace(1)* %out, align 4 235 ret void 236} 237 238; FUNC-LABEL: @v_ctpop_i32_add_vvar_inv 239; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], {{.*}} + 0x0 240; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], {{.*}} + 0x10 241; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] 242; SI: BUFFER_STORE_DWORD [[RESULT]], 243; SI: S_ENDPGM 244 245; EG: BCNT_INT 246define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind { 247 %val = load i32 addrspace(1)* %in, align 4 248 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 249 %gep = getelementptr i32 addrspace(1)* %constptr, i32 4 250 %const = load i32 addrspace(1)* %gep, align 4 251 %add = add i32 %const, %ctpop 252 store i32 %add, i32 addrspace(1)* %out, align 4 253 ret void 254} 255 256; FIXME: We currently disallow SALU instructions in all branches, 257; but there are some cases when the should be allowed. 258 259; FUNC-LABEL: @ctpop_i32_in_br 260; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], 261; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0 262; SI: BUFFER_STORE_DWORD [[RESULT]], 263; SI: S_ENDPGM 264; EG: BCNT_INT 265define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) { 266entry: 267 %0 = icmp eq i32 %cond, 0 268 br i1 %0, label %if, label %else 269 270if: 271 %1 = load i32 addrspace(1)* %in 272 %2 = call i32 @llvm.ctpop.i32(i32 %1) 273 br label %endif 274 275else: 276 %3 = getelementptr i32 addrspace(1)* %in, i32 1 277 %4 = load i32 addrspace(1)* %3 278 br label %endif 279 280endif: 281 %5 = phi i32 [%2, %if], [%4, %else] 282 store i32 %5, i32 addrspace(1)* %out 283 ret void 284} 285