• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Valve Corporation
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #include "ac_spm.h"
26 
27 #include "util/bitscan.h"
28 #include "util/u_memory.h"
29 #include "ac_perfcounter.h"
30 
31 static struct ac_spm_block_select *
ac_spm_get_block_select(struct ac_spm_trace_data * spm_trace,const struct ac_pc_block * block)32 ac_spm_get_block_select(struct ac_spm_trace_data *spm_trace,
33                         const struct ac_pc_block *block)
34 {
35    struct ac_spm_block_select *block_sel, *new_block_sel;
36    uint32_t num_block_sel;
37 
38    for (uint32_t i = 0; i < spm_trace->num_block_sel; i++) {
39       if (spm_trace->block_sel[i].b->b->b->gpu_block == block->b->b->gpu_block)
40          return &spm_trace->block_sel[i];
41    }
42 
43    /* Allocate a new select block if it doesn't already exist. */
44    num_block_sel = spm_trace->num_block_sel + 1;
45    block_sel = realloc(spm_trace->block_sel, num_block_sel * sizeof(*block_sel));
46    if (!block_sel)
47       return NULL;
48 
49    spm_trace->num_block_sel = num_block_sel;
50    spm_trace->block_sel = block_sel;
51 
52    /* Initialize the new select block. */
53    new_block_sel = &spm_trace->block_sel[spm_trace->num_block_sel - 1];
54    memset(new_block_sel, 0, sizeof(*new_block_sel));
55 
56    new_block_sel->b = block;
57    new_block_sel->num_counters = block->b->b->num_spm_counters;
58 
59    /* Broadcast global block writes to SEs and SAs */
60    if (!(block->b->b->flags & (AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER)))
61       new_block_sel->grbm_gfx_index = S_030800_SE_BROADCAST_WRITES(1) |
62                                       S_030800_SH_BROADCAST_WRITES(1);
63    /* Broadcast per SE block writes to SAs */
64    else if (block->b->b->flags & AC_PC_BLOCK_SE)
65       new_block_sel->grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1);
66 
67    return new_block_sel;
68 }
69 
70 static void
ac_spm_init_muxsel(const struct ac_pc_block * block,struct ac_spm_counter_info * counter,uint32_t spm_wire)71 ac_spm_init_muxsel(const struct ac_pc_block *block,
72                    struct ac_spm_counter_info *counter,
73                    uint32_t spm_wire)
74 {
75    struct ac_spm_muxsel *muxsel = &counter->muxsel;
76 
77    muxsel->counter = 2 * spm_wire + (counter->is_even ? 0 : 1);
78    muxsel->block = block->b->b->spm_block_select;
79    muxsel->shader_array = 0;
80    muxsel->instance = 0;
81 }
82 
83 static bool
ac_spm_map_counter(struct ac_spm_trace_data * spm_trace,struct ac_spm_block_select * block_sel,struct ac_spm_counter_info * counter,uint32_t * spm_wire)84 ac_spm_map_counter(struct ac_spm_trace_data *spm_trace,
85                    struct ac_spm_block_select *block_sel,
86                    struct ac_spm_counter_info *counter,
87                    uint32_t *spm_wire)
88 {
89    if (block_sel->b->b->b->gpu_block == SQ) {
90       for (unsigned i = 0; i < ARRAY_SIZE(spm_trace->sq_block_sel); i++) {
91          struct ac_spm_block_select *sq_block_sel = &spm_trace->sq_block_sel[i];
92          struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0];
93          if (i < spm_trace->num_used_sq_block_sel)
94             continue;
95 
96          /* SQ doesn't support 16-bit counters. */
97          cntr_sel->sel0 |= S_036700_PERF_SEL(counter->event_id) |
98                            S_036700_SPM_MODE(3) | /* 32-bit clamp */
99                            S_036700_PERF_MODE(0);
100          cntr_sel->active |= 0x3;
101 
102          /* 32-bits counter are always even. */
103          counter->is_even = true;
104 
105          /* One wire per SQ module. */
106          *spm_wire = i;
107 
108          spm_trace->num_used_sq_block_sel++;
109          return true;
110       }
111    } else {
112       /* Generic blocks. */
113       for (unsigned i = 0; i < block_sel->num_counters; i++) {
114          struct ac_spm_counter_select *cntr_sel = &block_sel->counters[i];
115          int index = ffs(~cntr_sel->active) - 1;
116 
117          switch (index) {
118          case 0: /* use S_037004_PERF_SEL */
119             cntr_sel->sel0 |= S_037004_PERF_SEL(counter->event_id) |
120                               S_037004_CNTR_MODE(1) | /* 16-bit clamp */
121                               S_037004_PERF_MODE(0); /* accum */
122             break;
123          case 1: /* use S_037004_PERF_SEL1 */
124             cntr_sel->sel0 |= S_037004_PERF_SEL1(counter->event_id) |
125                               S_037004_PERF_MODE1(0);
126             break;
127          case 2: /* use S_037004_PERF_SEL2 */
128             cntr_sel->sel1 |= S_037008_PERF_SEL2(counter->event_id) |
129                               S_037008_PERF_MODE2(0);
130             break;
131          case 3: /* use S_037004_PERF_SEL3 */
132             cntr_sel->sel1 |= S_037008_PERF_SEL3(counter->event_id) |
133                               S_037008_PERF_MODE3(0);
134             break;
135          default:
136             return false;
137          }
138 
139          /* Mark this 16-bit counter as used. */
140          cntr_sel->active |= 1 << index;
141 
142          /* Determine if the counter is even or odd. */
143          counter->is_even = !(index % 2);
144 
145          /* Determine the SPM wire (one wire holds two 16-bit counters). */
146          *spm_wire = !!(index >= 2);
147 
148          return true;
149       }
150    }
151 
152    return false;
153 }
154 
155 static bool
ac_spm_add_counter(const struct ac_perfcounters * pc,struct ac_spm_trace_data * spm_trace,const struct ac_spm_counter_create_info * info)156 ac_spm_add_counter(const struct ac_perfcounters *pc,
157                    struct ac_spm_trace_data *spm_trace,
158                    const struct ac_spm_counter_create_info *info)
159 {
160    struct ac_spm_counter_info *counter;
161    struct ac_spm_block_select *block_sel;
162    struct ac_pc_block *block;
163    uint32_t spm_wire;
164 
165    /* Check if the GPU block is valid. */
166    block = ac_pc_get_block(pc, info->gpu_block);
167    if (!block) {
168       fprintf(stderr, "ac/spm: Invalid GPU block.\n");
169       return false;
170    }
171 
172    /* Check if the number of instances is valid. */
173    if (info->instance > block->num_instances) {
174       fprintf(stderr, "ac/spm: Invalid instance ID.\n");
175       return false;
176    }
177 
178    /* Check if the event ID is valid. */
179    if (info->event_id > block->b->selectors) {
180       fprintf(stderr, "ac/spm: Invalid event ID.\n");
181       return false;
182    }
183 
184    counter = &spm_trace->counters[spm_trace->num_counters];
185    spm_trace->num_counters++;
186 
187    counter->gpu_block = info->gpu_block;
188    counter->instance = info->instance;
189    counter->event_id = info->event_id;
190 
191    /* Get the select block used to configure the counter. */
192    block_sel = ac_spm_get_block_select(spm_trace, block);
193    if (!block_sel)
194       return false;
195 
196    /* Map the counter to the select block. */
197    if (!ac_spm_map_counter(spm_trace, block_sel, counter, &spm_wire)) {
198       fprintf(stderr, "ac/spm: No free slots available!\n");
199       return false;
200    }
201 
202    /* Determine the counter segment type. */
203    if (block->b->b->flags & AC_PC_BLOCK_SE) {
204       counter->segment_type = AC_SPM_SEGMENT_TYPE_SE0; // XXX
205    } else {
206       counter->segment_type = AC_SPM_SEGMENT_TYPE_GLOBAL;
207    }
208 
209    /* Configure the muxsel for SPM. */
210    ac_spm_init_muxsel(block, counter, spm_wire);
211 
212    return true;
213 }
214 
ac_init_spm(const struct radeon_info * info,const struct ac_perfcounters * pc,unsigned num_counters,const struct ac_spm_counter_create_info * counters,struct ac_spm_trace_data * spm_trace)215 bool ac_init_spm(const struct radeon_info *info,
216                  const struct ac_perfcounters *pc,
217                  unsigned num_counters,
218                  const struct ac_spm_counter_create_info *counters,
219                  struct ac_spm_trace_data *spm_trace)
220 {
221    spm_trace->counters = CALLOC(num_counters, sizeof(*spm_trace->counters));
222    if (!spm_trace->counters)
223       return false;
224 
225    for (unsigned i = 0; i < num_counters; i++) {
226       if (!ac_spm_add_counter(pc, spm_trace, &counters[i])) {
227          fprintf(stderr, "ac/spm: Failed to add SPM counter (%d).\n", i);
228          return false;
229       }
230    }
231 
232    /* Determine the segment size and create a muxsel ram for every segment. */
233    for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
234       unsigned num_even_counters = 0, num_odd_counters = 0;
235 
236       if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
237          /* The global segment always start with a 64-bit timestamp. */
238          num_even_counters += AC_SPM_GLOBAL_TIMESTAMP_COUNTERS;
239       }
240 
241       /* Count the number of even/odd counters for this segment. */
242       for (unsigned c = 0; c < spm_trace->num_counters; c++) {
243          struct ac_spm_counter_info *counter = &spm_trace->counters[c];
244 
245          if (counter->segment_type != s)
246             continue;
247 
248          if (counter->is_even) {
249             num_even_counters++;
250          } else {
251             num_odd_counters++;
252          }
253       }
254 
255       /* Compute the number of lines. */
256       unsigned even_lines =
257          DIV_ROUND_UP(num_even_counters, AC_SPM_NUM_COUNTER_PER_MUXSEL);
258       unsigned odd_lines =
259          DIV_ROUND_UP(num_odd_counters, AC_SPM_NUM_COUNTER_PER_MUXSEL);
260       unsigned num_lines = (even_lines > odd_lines) ? (2 * even_lines - 1) : (2 * odd_lines);
261 
262       spm_trace->muxsel_lines[s] = CALLOC(num_lines, sizeof(*spm_trace->muxsel_lines[s]));
263       if (!spm_trace->muxsel_lines[s])
264          return false;
265       spm_trace->num_muxsel_lines[s] = num_lines;
266    }
267 
268    /* RLC uses the following order: Global, SE0, SE1, SE2, SE3. */
269    const enum ac_spm_segment_type ordered_segment[AC_SPM_SEGMENT_TYPE_COUNT] =
270    {
271       AC_SPM_SEGMENT_TYPE_GLOBAL,
272       AC_SPM_SEGMENT_TYPE_SE0,
273       AC_SPM_SEGMENT_TYPE_SE1,
274       AC_SPM_SEGMENT_TYPE_SE2,
275       AC_SPM_SEGMENT_TYPE_SE3,
276    };
277 
278    for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
279       if (!spm_trace->muxsel_lines[s])
280          continue;
281 
282       uint32_t segment_offset = 0;
283       for (unsigned i = 0; s != ordered_segment[i]; i++) {
284          segment_offset += spm_trace->num_muxsel_lines[ordered_segment[i]] *
285                            AC_SPM_NUM_COUNTER_PER_MUXSEL;
286       }
287 
288       uint32_t even_counter_idx = 0, even_line_idx = 0;
289       uint32_t odd_counter_idx = 0, odd_line_idx = 1;
290 
291       /* Add the global timestamps first. */
292       if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
293          struct ac_spm_muxsel global_timestamp_muxsel = {
294             .counter = 0x30,
295             .block = 0x3,
296             .shader_array = 0,
297             .instance = 0x1e,
298          };
299 
300          for (unsigned i = 0; i < 4; i++) {
301             spm_trace->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx++] = global_timestamp_muxsel;
302          }
303       }
304 
305       for (unsigned i = 0; i < spm_trace->num_counters; i++) {
306          struct ac_spm_counter_info *counter = &spm_trace->counters[i];
307 
308          if (counter->segment_type != s)
309             continue;
310 
311          if (counter->is_even) {
312             counter->offset = segment_offset + even_line_idx *
313                               AC_SPM_NUM_COUNTER_PER_MUXSEL + even_counter_idx;
314 
315             spm_trace->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx] = spm_trace->counters[i].muxsel;
316             if (++even_counter_idx == AC_SPM_NUM_COUNTER_PER_MUXSEL) {
317                even_counter_idx = 0;
318                even_line_idx += 2;
319             }
320          } else {
321             counter->offset = segment_offset + odd_line_idx *
322                               AC_SPM_NUM_COUNTER_PER_MUXSEL + odd_counter_idx;
323 
324             spm_trace->muxsel_lines[s][odd_line_idx].muxsel[odd_counter_idx] = spm_trace->counters[i].muxsel;
325             if (++odd_counter_idx == AC_SPM_NUM_COUNTER_PER_MUXSEL) {
326                odd_counter_idx = 0;
327                odd_line_idx += 2;
328             }
329          }
330       }
331    }
332 
333    return true;
334 }
335 
ac_destroy_spm(struct ac_spm_trace_data * spm_trace)336 void ac_destroy_spm(struct ac_spm_trace_data *spm_trace)
337 {
338    for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
339       FREE(spm_trace->muxsel_lines[s]);
340    }
341    FREE(spm_trace->block_sel);
342    FREE(spm_trace->counters);
343 }
344 
ac_spm_get_sample_size(const struct ac_spm_trace_data * spm_trace)345 uint32_t ac_spm_get_sample_size(const struct ac_spm_trace_data *spm_trace)
346 {
347    uint32_t sample_size = 0; /* in bytes */
348 
349    for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
350       sample_size += spm_trace->num_muxsel_lines[s] * AC_SPM_MUXSEL_LINE_SIZE * 4;
351    }
352 
353    return sample_size;
354 }
355 
ac_spm_get_num_samples(const struct ac_spm_trace_data * spm_trace)356 uint32_t ac_spm_get_num_samples(const struct ac_spm_trace_data *spm_trace)
357 {
358    uint32_t sample_size = ac_spm_get_sample_size(spm_trace);
359    uint32_t *ptr = (uint32_t *)spm_trace->ptr;
360    uint32_t data_size, num_lines_written;
361    uint32_t num_samples = 0;
362 
363    /* Get the data size (in bytes) written by the hw to the ring buffer. */
364    data_size = ptr[0];
365 
366    /* Compute the number of 256 bits (16 * 16-bits counters) lines written. */
367    num_lines_written = data_size / (2 * AC_SPM_NUM_COUNTER_PER_MUXSEL);
368 
369    /* Check for overflow. */
370    if (num_lines_written % (sample_size / 32)) {
371       abort();
372    } else {
373       num_samples = num_lines_written / (sample_size / 32);
374    }
375 
376    return num_samples;
377 }
378