• Home
  • Raw
  • Download

Lines Matching +full:trace +full:- +full:mapping

4  * SPDX-License-Identifier: MIT
91 for (uint32_t i = 0; i < spm->num_block_sel; i++) { in ac_spm_get_block_select()
92 if (spm->block_sel[i].b->b->b->gpu_block == block->b->b->gpu_block) in ac_spm_get_block_select()
93 return &spm->block_sel[i]; in ac_spm_get_block_select()
97 num_block_sel = spm->num_block_sel + 1; in ac_spm_get_block_select()
98 block_sel = realloc(spm->block_sel, num_block_sel * sizeof(*block_sel)); in ac_spm_get_block_select()
102 spm->num_block_sel = num_block_sel; in ac_spm_get_block_select()
103 spm->block_sel = block_sel; in ac_spm_get_block_select()
106 new_block_sel = &spm->block_sel[spm->num_block_sel - 1]; in ac_spm_get_block_select()
109 new_block_sel->b = block; in ac_spm_get_block_select()
110 new_block_sel->instances = in ac_spm_get_block_select()
111 calloc(block->num_global_instances, sizeof(*new_block_sel->instances)); in ac_spm_get_block_select()
112 if (!new_block_sel->instances) in ac_spm_get_block_select()
114 new_block_sel->num_instances = block->num_global_instances; in ac_spm_get_block_select()
116 for (unsigned i = 0; i < new_block_sel->num_instances; i++) in ac_spm_get_block_select()
117 new_block_sel->instances[i].num_counters = block->b->b->num_spm_counters; in ac_spm_get_block_select()
124 uint32_t sa_index; /* SA index or 0 if global or per-SE */
132 struct ac_spm_instance_mapping *mapping) in ac_spm_init_instance_mapping() argument
136 if (block->b->b->flags & AC_PC_BLOCK_SE) { in ac_spm_init_instance_mapping()
137 if (block->b->b->gpu_block == SQ) { in ac_spm_init_instance_mapping()
138 /* Per-SE blocks. */ in ac_spm_init_instance_mapping()
139 se_index = counter->instance / block->num_instances; in ac_spm_init_instance_mapping()
140 instance_index = counter->instance % block->num_instances; in ac_spm_init_instance_mapping()
142 /* Per-SA blocks. */ in ac_spm_init_instance_mapping()
143 assert(block->b->b->gpu_block == GL1C || in ac_spm_init_instance_mapping()
144 block->b->b->gpu_block == TCP || in ac_spm_init_instance_mapping()
145 block->b->b->gpu_block == SQ_WGP); in ac_spm_init_instance_mapping()
146 se_index = (counter->instance / block->num_instances) / info->max_sa_per_se; in ac_spm_init_instance_mapping()
147 sa_index = (counter->instance / block->num_instances) % info->max_sa_per_se; in ac_spm_init_instance_mapping()
148 instance_index = counter->instance % block->num_instances; in ac_spm_init_instance_mapping()
152 assert(block->b->b->gpu_block == GL2C); in ac_spm_init_instance_mapping()
153 instance_index = counter->instance; in ac_spm_init_instance_mapping()
156 if (se_index >= info->num_se || in ac_spm_init_instance_mapping()
157 sa_index >= info->max_sa_per_se || in ac_spm_init_instance_mapping()
158 instance_index >= block->num_instances) in ac_spm_init_instance_mapping()
161 mapping->se_index = se_index; in ac_spm_init_instance_mapping()
162 mapping->sa_index = sa_index; in ac_spm_init_instance_mapping()
163 mapping->instance_index = instance_index; in ac_spm_init_instance_mapping()
171 const struct ac_spm_instance_mapping *mapping, in ac_spm_init_muxsel() argument
175 const uint16_t counter_idx = 2 * spm_wire + (counter->is_even ? 0 : 1); in ac_spm_init_muxsel()
176 union ac_spm_muxsel *muxsel = &counter->muxsel; in ac_spm_init_muxsel()
178 if (info->gfx_level >= GFX11) { in ac_spm_init_muxsel()
179 muxsel->gfx11.counter = counter_idx; in ac_spm_init_muxsel()
180 muxsel->gfx11.block = block->b->b->spm_block_select; in ac_spm_init_muxsel()
181 muxsel->gfx11.shader_array = mapping->sa_index; in ac_spm_init_muxsel()
182 muxsel->gfx11.instance = mapping->instance_index; in ac_spm_init_muxsel()
184 muxsel->gfx10.counter = counter_idx; in ac_spm_init_muxsel()
185 muxsel->gfx10.block = block->b->b->spm_block_select; in ac_spm_init_muxsel()
186 muxsel->gfx10.shader_array = mapping->sa_index; in ac_spm_init_muxsel()
187 muxsel->gfx10.instance = mapping->instance_index; in ac_spm_init_muxsel()
193 const struct ac_spm_instance_mapping *mapping) in ac_spm_init_grbm_gfx_index() argument
195 uint32_t instance = mapping->instance_index; in ac_spm_init_grbm_gfx_index()
198 grbm_gfx_index |= S_030800_SE_INDEX(mapping->se_index) | in ac_spm_init_grbm_gfx_index()
199 S_030800_SH_INDEX(mapping->sa_index); in ac_spm_init_grbm_gfx_index()
201 switch (block->b->b->gpu_block) { in ac_spm_init_grbm_gfx_index()
207 /* Per-SE blocks. */ in ac_spm_init_grbm_gfx_index()
215 if (block->b->b->gpu_block == SQ_WGP) { in ac_spm_init_grbm_gfx_index()
228 const bool is_below_spi = mapping->instance_index >= num_wgp_above_spi; in ac_spm_init_grbm_gfx_index()
231 is_below_spi ? (mapping->instance_index - num_wgp_above_spi) : mapping->instance_index; in ac_spm_init_grbm_gfx_index()
245 const struct ac_spm_instance_mapping *mapping, in ac_spm_map_counter() argument
248 uint32_t instance = counter->instance; in ac_spm_map_counter()
250 if (block_sel->b->b->b->gpu_block == SQ_WGP) { in ac_spm_map_counter()
251 if (!spm->sq_wgp[instance].grbm_gfx_index) { in ac_spm_map_counter()
252 spm->sq_wgp[instance].grbm_gfx_index = in ac_spm_map_counter()
253 ac_spm_init_grbm_gfx_index(block_sel->b, mapping); in ac_spm_map_counter()
256 for (unsigned i = 0; i < ARRAY_SIZE(spm->sq_wgp[instance].counters); i++) { in ac_spm_map_counter()
257 struct ac_spm_counter_select *cntr_sel = &spm->sq_wgp[instance].counters[i]; in ac_spm_map_counter()
259 if (i < spm->sq_wgp[instance].num_counters) in ac_spm_map_counter()
262 cntr_sel->sel0 |= S_036700_PERF_SEL(counter->event_id) | in ac_spm_map_counter()
263 S_036700_SPM_MODE(1) | /* 16-bit clamp */ in ac_spm_map_counter()
266 /* Each SQ_WQP modules (GFX11+) share one 32-bit accumulator/wire in ac_spm_map_counter()
269 cntr_sel->active |= 1 << (i % 2); in ac_spm_map_counter()
272 if (cntr_sel->active & 0x1) in ac_spm_map_counter()
273 counter->is_even = true; in ac_spm_map_counter()
275 spm->sq_wgp[instance].num_counters++; in ac_spm_map_counter()
278 } else if (block_sel->b->b->b->gpu_block == SQ) { in ac_spm_map_counter()
279 for (unsigned i = 0; i < ARRAY_SIZE(spm->sqg[instance].counters); i++) { in ac_spm_map_counter()
280 struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[i]; in ac_spm_map_counter()
282 if (i < spm->sqg[instance].num_counters) in ac_spm_map_counter()
285 /* SQ doesn't support 16-bit counters. */ in ac_spm_map_counter()
286 cntr_sel->sel0 |= S_036700_PERF_SEL(counter->event_id) | in ac_spm_map_counter()
287 S_036700_SPM_MODE(3) | /* 32-bit clamp */ in ac_spm_map_counter()
289 cntr_sel->active |= 0x3; in ac_spm_map_counter()
291 /* 32-bits counter are always even. */ in ac_spm_map_counter()
292 counter->is_even = true; in ac_spm_map_counter()
297 spm->sqg[instance].num_counters++; in ac_spm_map_counter()
303 &block_sel->instances[instance]; in ac_spm_map_counter()
305 if (!block_instance->grbm_gfx_index) { in ac_spm_map_counter()
306 block_instance->grbm_gfx_index = in ac_spm_map_counter()
307 ac_spm_init_grbm_gfx_index(block_sel->b, mapping); in ac_spm_map_counter()
310 for (unsigned i = 0; i < block_instance->num_counters; i++) { in ac_spm_map_counter()
311 struct ac_spm_counter_select *cntr_sel = &block_instance->counters[i]; in ac_spm_map_counter()
312 int index = ffs(~cntr_sel->active) - 1; in ac_spm_map_counter()
316 cntr_sel->sel0 |= S_037004_PERF_SEL(counter->event_id) | in ac_spm_map_counter()
317 S_037004_CNTR_MODE(1) | /* 16-bit clamp */ in ac_spm_map_counter()
321 cntr_sel->sel0 |= S_037004_PERF_SEL1(counter->event_id) | in ac_spm_map_counter()
325 cntr_sel->sel1 |= S_037008_PERF_SEL2(counter->event_id) | in ac_spm_map_counter()
329 cntr_sel->sel1 |= S_037008_PERF_SEL3(counter->event_id) | in ac_spm_map_counter()
336 /* Mark this 16-bit counter as used. */ in ac_spm_map_counter()
337 cntr_sel->active |= 1 << index; in ac_spm_map_counter()
340 counter->is_even = !(index % 2); in ac_spm_map_counter()
342 /* Determine the SPM wire (one wire holds two 16-bit counters). */ in ac_spm_map_counter()
365 block = ac_pc_get_block(pc, counter_info->b->gpu_block); in ac_spm_add_counter()
372 if (counter_info->instance > block->num_global_instances - 1) { in ac_spm_add_counter()
378 if (counter_info->b->event_id > block->b->selectors) { in ac_spm_add_counter()
383 counter = &spm->counters[spm->num_counters]; in ac_spm_add_counter()
384 spm->num_counters++; in ac_spm_add_counter()
386 counter->gpu_block = counter_info->b->gpu_block; in ac_spm_add_counter()
387 counter->event_id = counter_info->b->event_id; in ac_spm_add_counter()
388 counter->instance = counter_info->instance; in ac_spm_add_counter()
395 /* Initialize instance mapping for the counter. */ in ac_spm_add_counter()
397 fprintf(stderr, "ac/spm: Failed to initialize instance mapping.\n"); in ac_spm_add_counter()
408 if (block->b->b->flags & AC_PC_BLOCK_SE) { in ac_spm_add_counter()
409 counter->segment_type = instance_mapping.se_index; in ac_spm_add_counter()
411 counter->segment_type = AC_SPM_SEGMENT_TYPE_GLOBAL; in ac_spm_add_counter()
426 struct ac_spm_muxsel_line *mappings = spm->muxsel_lines[segment_type]; in ac_spm_fill_muxsel_ram()
432 if (info->gfx_level >= GFX11) { in ac_spm_fill_muxsel_ram()
444 for (unsigned i = 0; i < spm->num_counters; i++) { in ac_spm_fill_muxsel_ram()
445 struct ac_spm_counter_info *counter = &spm->counters[i]; in ac_spm_fill_muxsel_ram()
447 if (counter->segment_type != segment_type) in ac_spm_fill_muxsel_ram()
450 if (counter->is_even) { in ac_spm_fill_muxsel_ram()
451 counter->offset = in ac_spm_fill_muxsel_ram()
454 mappings[even_line_idx].muxsel[even_counter_idx] = spm->counters[i].muxsel; in ac_spm_fill_muxsel_ram()
460 counter->offset = in ac_spm_fill_muxsel_ram()
463 mappings[odd_line_idx].muxsel[odd_counter_idx] = spm->counters[i].muxsel; in ac_spm_fill_muxsel_ram()
480 switch (info->gfx_level) { in ac_init_spm()
500 const struct ac_pc_block *block = ac_pc_get_block(pc, create_info[i].b->gpu_block); in ac_init_spm()
505 num_counters += block->num_global_instances; in ac_init_spm()
508 spm->counters = CALLOC(num_counters, sizeof(*spm->counters)); in ac_init_spm()
509 if (!spm->counters) in ac_init_spm()
513 const struct ac_pc_block *block = ac_pc_get_block(pc, create_info[i].b->gpu_block); in ac_init_spm()
516 for (unsigned j = 0; j < block->num_global_instances; j++) { in ac_init_spm()
531 /* The global segment always start with a 64-bit timestamp. */ in ac_init_spm()
536 for (unsigned c = 0; c < spm->num_counters; c++) { in ac_init_spm()
537 struct ac_spm_counter_info *counter = &spm->counters[c]; in ac_init_spm()
539 if (counter->segment_type != s) in ac_init_spm()
542 if (counter->is_even) { in ac_init_spm()
554 unsigned num_lines = (even_lines > odd_lines) ? (2 * even_lines - 1) : (2 * odd_lines); in ac_init_spm()
556 spm->muxsel_lines[s] = CALLOC(num_lines, sizeof(*spm->muxsel_lines[s])); in ac_init_spm()
557 if (!spm->muxsel_lines[s]) in ac_init_spm()
559 spm->num_muxsel_lines[s] = num_lines; in ac_init_spm()
566 spm->max_se_muxsel_lines = in ac_init_spm()
567 MAX2(spm->num_muxsel_lines[s], spm->max_se_muxsel_lines); in ac_init_spm()
573 const uint32_t num_global_lines = spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]; in ac_init_spm()
575 if (info->gfx_level >= GFX11) { in ac_init_spm()
577 for (unsigned i = 0; i < info->num_se; i++) { in ac_init_spm()
579 uint32_t offset = num_global_lines + i * spm->max_se_muxsel_lines; in ac_init_spm()
586 for (unsigned i = 0; i < info->num_se; i++) { in ac_init_spm()
591 offset += spm->num_muxsel_lines[i]; in ac_init_spm()
596 spm->ptr_granularity = info->gfx_level >= GFX11 ? 32 : 1; in ac_init_spm()
604 FREE(spm->muxsel_lines[s]); in ac_destroy_spm()
607 for (unsigned i = 0; i < spm->num_block_sel; i++) { in ac_destroy_spm()
608 FREE(spm->block_sel[i].instances); in ac_destroy_spm()
611 FREE(spm->block_sel); in ac_destroy_spm()
612 FREE(spm->counters); in ac_destroy_spm()
620 sample_size += spm->num_muxsel_lines[s] * AC_SPM_MUXSEL_LINE_SIZE * 4; in ac_spm_get_sample_size()
629 uint32_t *ptr = (uint32_t *)spm->ptr; in ac_spm_get_num_samples()
634 data_size = ptr[0] * spm->ptr_granularity; in ac_spm_get_num_samples()
636 /* Compute the number of 256 bits (16 * 16-bits counters) lines written. */ in ac_spm_get_num_samples()
649 void ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace) in ac_spm_get_trace() argument
651 memset(trace, 0, sizeof(*trace)); in ac_spm_get_trace()
653 trace->ptr = spm->ptr; in ac_spm_get_trace()
654 trace->sample_interval = spm->sample_interval; in ac_spm_get_trace()
655 trace->num_counters = spm->num_counters; in ac_spm_get_trace()
656 trace->counters = spm->counters; in ac_spm_get_trace()
657 trace->sample_size_in_bytes = ac_spm_get_sample_size(spm); in ac_spm_get_trace()
658 trace->num_samples = ac_spm_get_num_samples(spm); in ac_spm_get_trace()