1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s 3; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5declare i32 @llvm.ctpop.i32(i32) nounwind readnone 6declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone 7declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone 8declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone 9declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone 10 11; FUNC-LABEL: {{^}}s_ctpop_i32: 12; GCN: s_load_dword [[SVAL:s[0-9]+]], 13; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]] 14; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] 15; GCN: buffer_store_dword [[VRESULT]], 16; GCN: s_endpgm 17 18; EG: BCNT_INT 19define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { 20 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 21 store i32 %ctpop, i32 addrspace(1)* %out, align 4 22 ret void 23} 24 25; XXX - Why 0 in register? 26; FUNC-LABEL: {{^}}v_ctpop_i32: 27; GCN: buffer_load_dword [[VAL:v[0-9]+]], 28; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0 29; GCN: buffer_store_dword [[RESULT]], 30; GCN: s_endpgm 31 32; EG: BCNT_INT 33define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 34 %val = load i32, i32 addrspace(1)* %in, align 4 35 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 36 store i32 %ctpop, i32 addrspace(1)* %out, align 4 37 ret void 38} 39 40; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32: 41; GCN: buffer_load_dword [[VAL1:v[0-9]+]], 42; GCN: buffer_load_dword [[VAL0:v[0-9]+]], 43; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0 44; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] 45; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] 46; GCN: buffer_store_dword [[RESULT]], 47; GCN: s_endpgm 48 49; EG: BCNT_INT 50; EG: BCNT_INT 51define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind { 52 %val0 = load i32, i32 addrspace(1)* %in0, align 4 53 %val1 = load i32, i32 addrspace(1)* %in1, align 4 54 %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone 55 %ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone 56 %add = add i32 %ctpop0, %ctpop1 57 store i32 %add, i32 addrspace(1)* %out, align 4 58 ret void 59} 60 61; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32: 62; GCN: buffer_load_dword [[VAL0:v[0-9]+]], 63; GCN: s_waitcnt 64; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}} 65; GCN: buffer_store_dword [[RESULT]], 66; GCN: s_endpgm 67define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind { 68 %val0 = load i32, i32 addrspace(1)* %in0, align 4 69 %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone 70 %add = add i32 %ctpop0, %sval 71 store i32 %add, i32 addrspace(1)* %out, align 4 72 ret void 73} 74 75; FUNC-LABEL: {{^}}v_ctpop_v2i32: 76; GCN: v_bcnt_u32_b32_e64 77; GCN: v_bcnt_u32_b32_e64 78; GCN: s_endpgm 79 80; EG: BCNT_INT 81; EG: BCNT_INT 82define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind { 83 %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8 84 %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone 85 store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8 86 ret void 87} 88 89; FUNC-LABEL: {{^}}v_ctpop_v4i32: 90; GCN: v_bcnt_u32_b32_e64 91; GCN: v_bcnt_u32_b32_e64 92; GCN: v_bcnt_u32_b32_e64 93; GCN: v_bcnt_u32_b32_e64 94; GCN: s_endpgm 95 96; EG: BCNT_INT 97; EG: BCNT_INT 98; EG: BCNT_INT 99; EG: BCNT_INT 100define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind { 101 %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16 102 %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone 103 store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16 104 ret void 105} 106 107; FUNC-LABEL: {{^}}v_ctpop_v8i32: 108; GCN: v_bcnt_u32_b32_e64 109; GCN: v_bcnt_u32_b32_e64 110; GCN: v_bcnt_u32_b32_e64 111; GCN: v_bcnt_u32_b32_e64 112; GCN: v_bcnt_u32_b32_e64 113; GCN: v_bcnt_u32_b32_e64 114; GCN: v_bcnt_u32_b32_e64 115; GCN: v_bcnt_u32_b32_e64 116; GCN: s_endpgm 117 118; EG: BCNT_INT 119; EG: BCNT_INT 120; EG: BCNT_INT 121; EG: BCNT_INT 122; EG: BCNT_INT 123; EG: BCNT_INT 124; EG: BCNT_INT 125; EG: BCNT_INT 126define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind { 127 %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32 128 %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone 129 store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32 130 ret void 131} 132 133; FUNC-LABEL: {{^}}v_ctpop_v16i32: 134; GCN: v_bcnt_u32_b32_e64 135; GCN: v_bcnt_u32_b32_e64 136; GCN: v_bcnt_u32_b32_e64 137; GCN: v_bcnt_u32_b32_e64 138; GCN: v_bcnt_u32_b32_e64 139; GCN: v_bcnt_u32_b32_e64 140; GCN: v_bcnt_u32_b32_e64 141; GCN: v_bcnt_u32_b32_e64 142; GCN: v_bcnt_u32_b32_e64 143; GCN: v_bcnt_u32_b32_e64 144; GCN: v_bcnt_u32_b32_e64 145; GCN: v_bcnt_u32_b32_e64 146; GCN: v_bcnt_u32_b32_e64 147; GCN: v_bcnt_u32_b32_e64 148; GCN: v_bcnt_u32_b32_e64 149; GCN: v_bcnt_u32_b32_e64 150; GCN: s_endpgm 151 152; EG: BCNT_INT 153; EG: BCNT_INT 154; EG: BCNT_INT 155; EG: BCNT_INT 156; EG: BCNT_INT 157; EG: BCNT_INT 158; EG: BCNT_INT 159; EG: BCNT_INT 160; EG: BCNT_INT 161; EG: BCNT_INT 162; EG: BCNT_INT 163; EG: BCNT_INT 164; EG: BCNT_INT 165; EG: BCNT_INT 166; EG: BCNT_INT 167; EG: BCNT_INT 168define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind { 169 %val = load <16 x i32>, <16 x i32> addrspace(1)* %in, align 32 170 %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone 171 store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32 172 ret void 173} 174 175; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant: 176; GCN: buffer_load_dword [[VAL:v[0-9]+]], 177; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 178; GCN: buffer_store_dword [[RESULT]], 179; GCN: s_endpgm 180 181; EG: BCNT_INT 182define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 183 %val = load i32, i32 addrspace(1)* %in, align 4 184 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 185 %add = add i32 %ctpop, 4 186 store i32 %add, i32 addrspace(1)* %out, align 4 187 ret void 188} 189 190; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv: 191; GCN: buffer_load_dword [[VAL:v[0-9]+]], 192; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 193; GCN: buffer_store_dword [[RESULT]], 194; GCN: s_endpgm 195 196; EG: BCNT_INT 197define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 198 %val = load i32, i32 addrspace(1)* %in, align 4 199 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 200 %add = add i32 4, %ctpop 201 store i32 %add, i32 addrspace(1)* %out, align 4 202 ret void 203} 204 205; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal: 206; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], 207; GCN-DAG: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f 208; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] 209; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] 210; GCN: buffer_store_dword [[RESULT]], 211; GCN: s_endpgm 212define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { 213 %val = load i32, i32 addrspace(1)* %in, align 4 214 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 215 %add = add i32 %ctpop, 99999 216 store i32 %add, i32 addrspace(1)* %out, align 4 217 ret void 218} 219 220; FUNC-LABEL: {{^}}v_ctpop_i32_add_var: 221; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], 222; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], 223; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] 224; GCN: buffer_store_dword [[RESULT]], 225; GCN: s_endpgm 226 227; EG: BCNT_INT 228define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { 229 %val = load i32, i32 addrspace(1)* %in, align 4 230 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 231 %add = add i32 %ctpop, %const 232 store i32 %add, i32 addrspace(1)* %out, align 4 233 ret void 234} 235 236; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv: 237; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], 238; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], 239; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] 240; GCN: buffer_store_dword [[RESULT]], 241; GCN: s_endpgm 242 243; EG: BCNT_INT 244define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { 245 %val = load i32, i32 addrspace(1)* %in, align 4 246 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 247 %add = add i32 %const, %ctpop 248 store i32 %add, i32 addrspace(1)* %out, align 4 249 ret void 250} 251 252; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv: 253; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], {{0$}} 254; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:16 255; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] 256; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] 257; GCN: buffer_store_dword [[RESULT]], 258; GCN: s_endpgm 259 260; EG: BCNT_INT 261define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind { 262 %val = load i32, i32 addrspace(1)* %in, align 4 263 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone 264 %gep = getelementptr i32, i32 addrspace(1)* %constptr, i32 4 265 %const = load i32, i32 addrspace(1)* %gep, align 4 266 %add = add i32 %const, %ctpop 267 store i32 %add, i32 addrspace(1)* %out, align 4 268 ret void 269} 270 271; FIXME: We currently disallow SALU instructions in all branches, 272; but there are some cases when the should be allowed. 273 274; FUNC-LABEL: {{^}}ctpop_i32_in_br: 275; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd 276; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x34 277; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]] 278; GCN: v_mov_b32_e32 [[RESULT]], [[SRESULT]] 279; GCN: buffer_store_dword [[RESULT]], 280; GCN: s_endpgm 281; EG: BCNT_INT 282define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) { 283entry: 284 %tmp0 = icmp eq i32 %cond, 0 285 br i1 %tmp0, label %if, label %else 286 287if: 288 %tmp2 = call i32 @llvm.ctpop.i32(i32 %ctpop_arg) 289 br label %endif 290 291else: 292 %tmp3 = getelementptr i32, i32 addrspace(1)* %in, i32 1 293 %tmp4 = load i32, i32 addrspace(1)* %tmp3 294 br label %endif 295 296endif: 297 %tmp5 = phi i32 [%tmp2, %if], [%tmp4, %else] 298 store i32 %tmp5, i32 addrspace(1)* %out 299 ret void 300} 301