aco_statistics.cpp - OpenGrok cross reference for /third_party/mesa3d/src/amd/compiler/aco

Lines Matching +full:vm +full:- +full:other
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
41    for (Block& block : program->blocks)  in collect_presched_stats()
43    program->statistics[statistic_sgpr_presched] = presched_demand.sgpr;  in collect_presched_stats()
44    program->statistics[statistic_vgpr_presched] = presched_demand.vgpr;  in collect_presched_stats()
71    std::deque<int32_t> vm;  member in aco::BlockCycleEstimator
76    void join(const BlockCycleEstimator& other);
88        : vm(vm_), exp(exp_), lgkm(lgkm_), vs(vs_)  in wait_counter_info()
91    unsigned vm;  member
110    instr_class cls = instr_info.classes[(int)instr->opcode];  in get_perf_info()
115    if (program->gfx_level >= GFX10) {  in get_perf_info()
138          return instr->ds().gds ? perf_info{0, WAIT_USE(export_gds, 1)}  in get_perf_info()
144       case instr_class::other:  in get_perf_info()
154          return program->dev.has_fast_fma32 ? perf_info{4, WAIT_USE(valu, 4)}  in get_perf_info()
167          return instr->ds().gds ? perf_info{4, WAIT_USE(export_gds, 4)}  in get_perf_info()
173       case instr_class::other:  in get_perf_info()
205       cost = MAX2(cost, res_available[(int)perf.rsrc0] - cur_cycle);  in cycles_until_res_available()
207       cost = MAX2(cost, res_available[(int)perf.rsrc1] - cur_cycle);  in cycles_until_res_available()
218    if (instr->isEXP())  in get_wait_counter_info()
221    if (instr->isFlatLike()) {  in get_wait_counter_info()
222       unsigned lgkm = instr->isFlat() ? 20 : 0;  in get_wait_counter_info()
223       if (!instr->definitions.empty())  in get_wait_counter_info()
229    if (instr->isSMEM()) {  in get_wait_counter_info()
230       if (instr->definitions.empty())  in get_wait_counter_info()
232       if (instr->operands.empty()) /* s_memtime and s_memrealtime */  in get_wait_counter_info()
235       bool likely_desc_load = instr->operands[0].size() == 2;  in get_wait_counter_info()
236       bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4);  in get_wait_counter_info()
238          instr->operands[1].isConstant() && (!soe || instr->operands.back().isConstant());  in get_wait_counter_info()
246    if (instr->format == Format::DS)  in get_wait_counter_info()
249    if (instr->isVMEM() && !instr->definitions.empty())  in get_wait_counter_info()
252    if (instr->isVMEM() && instr->definitions.empty())  in get_wait_counter_info()
261    if (instr->opcode == aco_opcode::s_endpgm) {  in get_wait_imm()
263    } else if (instr->opcode == aco_opcode::s_waitcnt) {  in get_wait_imm()
264       return wait_imm(GFX10_3, instr->sopp().imm);  in get_wait_imm()
265    } else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) {  in get_wait_imm()
266       return wait_imm(0, 0, 0, instr->sopk().imm);  in get_wait_imm()
268       unsigned max_lgkm_cnt = program->gfx_level >= GFX10 ? 62 : 14;  in get_wait_imm()
270       unsigned max_vm_cnt = program->gfx_level >= GFX9 ? 62 : 14;  in get_wait_imm()
277       imm.vm = wait_info.vm ? max_vm_cnt : wait_imm::unset_counter;  in get_wait_imm()
289    if (imm.vm != wait_imm::unset_counter) {  in get_dependency_cost()
290       for (int i = 0; i < (int)vm.size() - imm.vm; i++)  in get_dependency_cost()
291          deps_available = MAX2(deps_available, vm[i]);  in get_dependency_cost()
294       for (int i = 0; i < (int)exp.size() - imm.exp; i++)  in get_dependency_cost()
298       for (int i = 0; i < (int)lgkm.size() - imm.lgkm; i++)  in get_dependency_cost()
302       for (int i = 0; i < (int)vs.size() - imm.vs; i++)  in get_dependency_cost()
306    if (instr->opcode == aco_opcode::s_endpgm) {  in get_dependency_cost()
309    } else if (program->gfx_level >= GFX10) {  in get_dependency_cost()
310       for (Operand& op : instr->operands) {  in get_dependency_cost()
318    if (program->gfx_level < GFX10)  in get_dependency_cost()
321    return deps_available - cur_cycle;  in get_dependency_cost()
328    return dep + std::max(cycles_until_res_available(instr) - dep, 0);  in predict_cost()
360    bool dual_issue = program->gfx_level >= GFX10 && program->wave_size == 64 &&  in add()
361                      is_vector(instr->opcode) && program->workgroup_size > 32;  in add()
368       /* GCN is in-order and doesn't begin the next instruction until the current one finishes */  in add()
369       cur_cycle += program->gfx_level >= GFX10 ? 1 : perf.latency;  in add()
377    while (vm.size() > imm.vm)  in add()
378       vm.pop_front();  in add()
387    if (wait_info.vm)  in add()
388       vm.push_back(cur_cycle + wait_info.vm);  in add()
395    int latency = MAX3(wait_info.exp, wait_info.lgkm, wait_info.vm);  in add()
398    for (Definition& def : instr->definitions) {  in add()
410    for (int i = pred.size() - queue.size() - 1; i >= 0; i--)  in join_queue()
421       res_available[i] = MAX2(res_available[i], pred.res_available[i] - pred.cur_cycle);  in join()
425       reg_available[i] = MAX2(reg_available[i], pred.reg_available[i] - pred.cur_cycle + cur_cycle);  in join()
427    join_queue(lgkm, pred.lgkm, -pred.cur_cycle);  in join()
428    join_queue(exp, pred.exp, -pred.cur_cycle);  in join()
429    join_queue(vm, pred.vm, -pred.cur_cycle);  in join()
430    join_queue(vs, pred.vs, -pred.cur_cycle);  in join()
437    for (Block& block : program->blocks) {  in collect_preasm_stats()
441       program->statistics[statistic_instructions] += block.instructions.size();  in collect_preasm_stats()
444          if (instr->isSOPP() && instr->sopp().block != -1)  in collect_preasm_stats()
445             program->statistics[statistic_branches]++;  in collect_preasm_stats()
447          if (instr->opcode == aco_opcode::p_constaddr)  in collect_preasm_stats()
448             program->statistics[statistic_instructions] += 2;  in collect_preasm_stats()
450          if ((instr->isVMEM() || instr->isScratch() || instr->isGlobal()) &&  in collect_preasm_stats()
451              !instr->operands.empty()) {  in collect_preasm_stats()
453                              [&](Instruction* other)  in collect_preasm_stats()
454                              { return should_form_clause(instr.get(), other); }))  in collect_preasm_stats()
455                program->statistics[statistic_vmem_clauses]++;  in collect_preasm_stats()
461          if (instr->isSMEM() && !instr->operands.empty()) {  in collect_preasm_stats()
463                              [&](Instruction* other)  in collect_preasm_stats()  argument
464                              { return should_form_clause(instr.get(), other); }))  in collect_preasm_stats()  argument
465                program->statistics[statistic_smem_clauses]++;  in collect_preasm_stats()
475    std::vector<BlockCycleEstimator> blocks(program->blocks.size(), program);  in collect_preasm_stats()
477    if (program->stage.has(SWStage::VS) && program->info.vs.has_prolog) {  in collect_preasm_stats()
479       for (Definition def : program->vs_inputs) {  in collect_preasm_stats()
480          blocks[0].vm.push_back(vs_input_latency);  in collect_preasm_stats()
486    for (Block& block : program->blocks) {  in collect_preasm_stats()
494          instr->pass_flags = block_est.cur_cycle - before;  in collect_preasm_stats()
505       /* Assume loops execute 8-2 times, uniform branches are taken 50% the time,  in collect_preasm_stats()
512       iter *= block.loop_nest_depth > 2 ? pow(2.0, block.loop_nest_depth - 2) : 1.0;  in collect_preasm_stats()
519          program->blocks[block.linear_preds[0]].kind & (block_kind_branch | block_kind_invert);  in collect_preasm_stats()
530     * work to from other waves while one is idle but that might not be the case  in collect_preasm_stats()
531     * because those other waves have not reached such a point yet.  in collect_preasm_stats()
534    double parallelism = program->num_waves;  in collect_preasm_stats()
540    double wave64_per_cycle = waves_per_cycle * (program->wave_size / 64.0);  in collect_preasm_stats()
543    if (program->workgroup_size != UINT_MAX)  in collect_preasm_stats()
545          program->workgroup_size / (double)align(program->workgroup_size, program->wave_size);  in collect_preasm_stats()
548    program->statistics[statistic_latency] = round(latency);  in collect_preasm_stats()
549    program->statistics[statistic_inv_throughput] = round(1.0 / wave64_per_cycle);  in collect_preasm_stats()
554       fprintf(stderr, "num_waves: %u\n", program->num_waves);  in collect_preasm_stats()
574    program->statistics[aco::statistic_hash] = util_hash_crc32(code.data(), code.size() * 4);  in collect_postasm_stats()