• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Rob Clark <robclark@freedesktop.org>
3  * All Rights Reserved.
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <assert.h>
8 #include <ctype.h>
9 #include <curses.h>
10 #include <err.h>
11 #include <inttypes.h>
12 #include <libconfig.h>
13 #include <locale.h>
14 #include <stdint.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <time.h>
19 #include <unistd.h>
20 #include <xf86drm.h>
21 
22 #include "drm/freedreno_drmif.h"
23 #include "drm/freedreno_ringbuffer.h"
24 
25 #include "util/os_file.h"
26 
27 #include "freedreno_dt.h"
28 #include "freedreno_perfcntr.h"
29 
30 #define MAX_CNTR_PER_GROUP 24
31 #define REFRESH_MS         500
32 
33 static struct {
34    int refresh_ms;
35    bool dump;
36 } options = {
37    .refresh_ms = REFRESH_MS,
38    .dump = false,
39 };
40 
41 /* NOTE first counter group should always be CP, since we unconditionally
42  * use CP counter to measure the gpu freq.
43  */
44 
45 struct counter_group {
46    const struct fd_perfcntr_group *group;
47 
48    struct {
49       const struct fd_perfcntr_counter *counter;
50       uint16_t select_val;
51       bool is_gpufreq_counter;
52    } counter[MAX_CNTR_PER_GROUP];
53 
54    /* name of currently selected counters (for UI): */
55    const char *label[MAX_CNTR_PER_GROUP];
56 
57    uint64_t value[MAX_CNTR_PER_GROUP];
58    uint64_t value_delta[MAX_CNTR_PER_GROUP];
59 
60    uint64_t sample_time[MAX_CNTR_PER_GROUP];
61    uint64_t sample_time_delta[MAX_CNTR_PER_GROUP];
62 };
63 
64 static struct {
65    void *io;
66    uint32_t min_freq;
67    uint32_t max_freq;
68    /* per-generation table of counters: */
69    unsigned ngroups;
70    struct counter_group *groups;
71    /* drm device (for writing select regs via ring): */
72    struct fd_device *dev;
73    struct fd_pipe *pipe;
74    const struct fd_dev_id *dev_id;
75    struct fd_submit *submit;
76    struct fd_ringbuffer *ring;
77 } dev;
78 
79 static void config_save(void);
80 static void config_restore(void);
81 static void restore_counter_groups(void);
82 
83 /*
84  * helpers
85  */
86 
87 static uint64_t
gettime_us(void)88 gettime_us(void)
89 {
90    struct timespec ts;
91    clock_gettime(CLOCK_MONOTONIC, &ts);
92    return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
93 }
94 
95 static void
sleep_us(uint32_t us)96 sleep_us(uint32_t us)
97 {
98    const struct timespec ts = {
99       .tv_sec = us / 1000000,
100       .tv_nsec = (us % 1000000) * 1000,
101    };
102    clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
103 }
104 
105 static uint64_t
delta(uint64_t a,uint64_t b)106 delta(uint64_t a, uint64_t b)
107 {
108    /* deal with rollover: */
109    if (a > b)
110       return 0xffffffffffffffffull - a + b;
111    else
112       return b - a;
113 }
114 
115 static void
find_device(void)116 find_device(void)
117 {
118    int ret;
119 
120    dev.dev = fd_device_open();
121    if (!dev.dev)
122       err(1, "could not open drm device");
123 
124    dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
125 
126    dev.dev_id = fd_pipe_dev_id(dev.pipe);
127    if (!fd_dev_info_raw(dev.dev_id))
128       err(1, "unknown device");
129 
130    printf("device: %s\n", fd_dev_name(dev.dev_id));
131 
132    /* try MAX_FREQ first as that will work regardless of old dt
133     * dt bindings vs upstream bindings:
134     */
135    uint64_t val;
136    ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
137    if (ret) {
138       printf("falling back to parsing DT bindings for freq\n");
139       if (!fd_dt_find_freqs(&dev.min_freq, &dev.max_freq))
140          err(1, "could not find GPU freqs");
141    } else {
142       dev.min_freq = 0;
143       dev.max_freq = val;
144    }
145 
146    printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
147 
148    dev.io = fd_dt_find_io();
149    if (!dev.io) {
150       err(1, "could not map device");
151    }
152 
153    fd_pipe_set_param(dev.pipe, FD_SYSPROF, 1);
154 }
155 
156 /*
157  * perf-monitor
158  */
159 
160 static void
flush_ring(void)161 flush_ring(void)
162 {
163    if (!dev.submit)
164       return;
165 
166    struct fd_fence *fence = fd_submit_flush(dev.submit, -1, false);
167 
168    if (!fence)
169       errx(1, "submit failed");
170 
171    fd_fence_flush(fence);
172    fd_fence_del(fence);
173    fd_ringbuffer_del(dev.ring);
174    fd_submit_del(dev.submit);
175 
176    dev.ring = NULL;
177    dev.submit = NULL;
178 }
179 
180 static void
select_counter(struct counter_group * group,int ctr,int countable_val)181 select_counter(struct counter_group *group, int ctr, int countable_val)
182 {
183    assert(ctr < group->group->num_counters);
184 
185    unsigned countable_idx = UINT32_MAX;
186    for (unsigned i = 0; i < group->group->num_countables; i++) {
187       if (countable_val != group->group->countables[i].selector)
188          continue;
189 
190       countable_idx = i;
191       break;
192    }
193 
194    if (countable_idx >= group->group->num_countables)
195       return;
196 
197    group->label[ctr] = group->group->countables[countable_idx].name;
198    group->counter[ctr].select_val = countable_val;
199 
200    if (!dev.submit) {
201       dev.submit = fd_submit_new(dev.pipe);
202       dev.ring = fd_submit_new_ringbuffer(
203          dev.submit, 0x1000, FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
204    }
205 
206    /* bashing select register directly while gpu is active will end
207     * in tears.. so we need to write it via the ring:
208     *
209     * TODO it would help startup time, if gpu is loaded, to batch
210     * all the initial writes and do a single flush.. although that
211     * makes things more complicated for capturing inital sample value
212     */
213    struct fd_ringbuffer *ring = dev.ring;
214    switch (fd_dev_gen(dev.dev_id)) {
215    case 2:
216    case 3:
217    case 4:
218       OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
219       OUT_RING(ring, 0x00000000);
220 
221       if (group->group->counters[ctr].enable) {
222          OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
223          OUT_RING(ring, 0);
224       }
225 
226       if (group->group->counters[ctr].clear) {
227          OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
228          OUT_RING(ring, 1);
229 
230          OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
231          OUT_RING(ring, 0);
232       }
233 
234       OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
235       OUT_RING(ring, countable_val);
236 
237       if (group->group->counters[ctr].enable) {
238          OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
239          OUT_RING(ring, 1);
240       }
241 
242       break;
243    case 5:
244    case 6:
245    case 7:
246       OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
247 
248       if (group->group->counters[ctr].enable) {
249          OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
250          OUT_RING(ring, 0);
251       }
252 
253       if (group->group->counters[ctr].clear) {
254          OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
255          OUT_RING(ring, 1);
256 
257          OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
258          OUT_RING(ring, 0);
259       }
260 
261       OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
262       OUT_RING(ring, countable_val);
263 
264       if (group->group->counters[ctr].enable) {
265          OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
266          OUT_RING(ring, 1);
267       }
268 
269       break;
270    }
271 }
272 
load_counter_value(struct counter_group * group,int ctr)273 static uint64_t load_counter_value(struct counter_group *group, int ctr)
274 {
275    /* We can read the counter register value as an uint64_t, as long as the
276     * lo/hi addresses are neighboring and the lo address is 8-byte-aligned.
277     * This currently holds for all counters exposed in perfcounter groups.
278     */
279    const struct fd_perfcntr_counter *counter = group->counter[ctr].counter;
280    assert(counter->counter_reg_lo + 1 == counter->counter_reg_hi);
281    assert(!((counter->counter_reg_lo * 4) % 8));
282    return *((uint64_t *) (dev.io + counter->counter_reg_lo * 4));
283 }
284 
285 static void
resample_counter(struct counter_group * group,int ctr,uint64_t sample_time)286 resample_counter(struct counter_group *group, int ctr, uint64_t sample_time)
287 {
288    uint64_t previous_value = group->value[ctr];
289    group->value[ctr] = load_counter_value(group, ctr);
290    group->value_delta[ctr] = delta(previous_value, group->value[ctr]);
291 
292    uint64_t previous_sample_time = group->sample_time[ctr];
293    group->sample_time[ctr] = sample_time;
294    group->sample_time_delta[ctr] = delta(previous_sample_time, sample_time);
295 }
296 
297 /* sample all the counters: */
298 static void
resample(void)299 resample(void)
300 {
301    static uint64_t last_time;
302    uint64_t current_time = gettime_us();
303 
304    if ((current_time - last_time) < (options.refresh_ms * 1000 / 2))
305       return;
306 
307    last_time = current_time;
308 
309    for (unsigned i = 0; i < dev.ngroups; i++) {
310       struct counter_group *group = &dev.groups[i];
311       for (unsigned j = 0; j < group->group->num_counters; j++) {
312          resample_counter(group, j, current_time);
313       }
314    }
315 }
316 
317 /*
318  * The UI
319  */
320 
321 #define COLOR_GROUP_HEADER 1
322 #define COLOR_FOOTER       2
323 #define COLOR_INVERSE      3
324 
325 static int w, h;
326 static int ctr_width;
327 static int max_rows, current_cntr = 1;
328 
329 static void
redraw_footer(WINDOW * win)330 redraw_footer(WINDOW *win)
331 {
332    char footer[128];
333    int n = snprintf(footer, sizeof(footer), " fdperf: %s (%.2fMHz..%.2fMHz)",
334                     fd_dev_name(dev.dev_id), ((float)dev.min_freq) / 1000000.0,
335                     ((float)dev.max_freq) / 1000000.0);
336 
337    wmove(win, h - 1, 0);
338    wattron(win, COLOR_PAIR(COLOR_FOOTER));
339    waddstr(win, footer);
340    whline(win, ' ', w - n);
341    wattroff(win, COLOR_PAIR(COLOR_FOOTER));
342 }
343 
344 static void
redraw_group_header(WINDOW * win,int row,const char * name)345 redraw_group_header(WINDOW *win, int row, const char *name)
346 {
347    wmove(win, row, 0);
348    wattron(win, A_BOLD);
349    wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
350    waddstr(win, name);
351    whline(win, ' ', w - strlen(name));
352    wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
353    wattroff(win, A_BOLD);
354 }
355 
356 static void
redraw_counter_label(WINDOW * win,int row,const char * name,bool selected)357 redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)
358 {
359    int n = strlen(name);
360    assert(n <= ctr_width);
361    wmove(win, row, 0);
362    whline(win, ' ', ctr_width - n);
363    wmove(win, row, ctr_width - n);
364    if (selected)
365       wattron(win, COLOR_PAIR(COLOR_INVERSE));
366    waddstr(win, name);
367    if (selected)
368       wattroff(win, COLOR_PAIR(COLOR_INVERSE));
369    waddstr(win, ": ");
370 }
371 
372 static void
redraw_counter_value_cycles(WINDOW * win,float val)373 redraw_counter_value_cycles(WINDOW *win, float val)
374 {
375    char str[32];
376    int x = getcurx(win);
377    int valwidth = w - x;
378    int barwidth, n;
379 
380    /* convert to fraction of max freq: */
381    val = val / (float)dev.max_freq;
382 
383    /* figure out percentage-bar width: */
384    barwidth = (int)(val * valwidth);
385 
386    /* sometimes things go over 100%.. idk why, could be
387     * things running faster than base clock, or counter
388     * summing up cycles in multiple cores?
389     */
390    barwidth = MIN2(barwidth, valwidth - 1);
391 
392    n = snprintf(str, sizeof(str), "%.2f%%", 100.0 * val);
393    wattron(win, COLOR_PAIR(COLOR_INVERSE));
394    waddnstr(win, str, barwidth);
395    if (barwidth > n) {
396       whline(win, ' ', barwidth - n);
397       wmove(win, getcury(win), x + barwidth);
398    }
399    wattroff(win, COLOR_PAIR(COLOR_INVERSE));
400    if (barwidth < n)
401       waddstr(win, str + barwidth);
402    whline(win, ' ', w - getcurx(win));
403 }
404 
405 static void
redraw_counter_value(WINDOW * win,int row,struct counter_group * group,int ctr)406 redraw_counter_value(WINDOW *win, int row, struct counter_group *group, int ctr)
407 {
408    char str[32];
409    int n = snprintf(str, sizeof(str), "%" PRIu64 " ", group->value_delta[ctr]);
410 
411    whline(win, ' ', 24 - n);
412    wmove(win, row, getcurx(win) + 24 - n);
413    waddstr(win, str);
414 
415    /* quick hack, if the label has "CYCLE" in the name, it is
416     * probably a cycle counter ;-)
417     * Perhaps add more info in rnndb schema to know how to
418     * treat individual counters (ie. which are cycles, and
419     * for those we want to present as a percentage do we
420     * need to scale the result.. ie. is it running at some
421     * multiple or divisor of core clk, etc)
422     *
423     * TODO it would be much more clever to get this from xml
424     * Also.. in some cases I think we want to know how many
425     * units the counter is counting for, ie. if a320 has 2x
426     * shader as a306 we might need to scale the result..
427     */
428    if (strstr(group->label[ctr], "CYCLE") ||
429        strstr(group->label[ctr], "BUSY") || strstr(group->label[ctr], "IDLE")) {
430       float cycles_val = (float) group->value_delta[ctr] * 1000000.0 /
431                          (float) group->sample_time_delta[ctr];
432       redraw_counter_value_cycles(win, cycles_val);
433    } else {
434       whline(win, ' ', w - getcurx(win));
435    }
436 }
437 
438 static void
redraw_counter(WINDOW * win,int row,struct counter_group * group,int ctr,bool selected)439 redraw_counter(WINDOW *win, int row, struct counter_group *group, int ctr,
440                bool selected)
441 {
442    redraw_counter_label(win, row, group->label[ctr], selected);
443    redraw_counter_value(win, row, group, ctr);
444 }
445 
446 static void
redraw_gpufreq_counter(WINDOW * win,int row)447 redraw_gpufreq_counter(WINDOW *win, int row)
448 {
449    redraw_counter_label(win, row, "Freq (MHz)", false);
450 
451    struct counter_group *group = &dev.groups[0];
452    float freq_val = (float) group->value_delta[0] / (float) group->sample_time_delta[0];
453 
454    char str[32];
455    snprintf(str, sizeof(str), "%.2f", freq_val);
456 
457    waddstr(win, str);
458    whline(win, ' ', w - getcurx(win));
459 }
460 
461 static void
redraw(WINDOW * win)462 redraw(WINDOW *win)
463 {
464    static int scroll = 0;
465    int max, row = 0;
466 
467    w = getmaxx(win);
468    h = getmaxy(win);
469 
470    max = h - 3;
471 
472    if ((current_cntr - scroll) > (max - 1)) {
473       scroll = current_cntr - (max - 1);
474    } else if ((current_cntr - 1) < scroll) {
475       scroll = current_cntr - 1;
476    }
477 
478    for (unsigned i = 0; i < dev.ngroups; i++) {
479       struct counter_group *group = &dev.groups[i];
480       unsigned j = 0;
481 
482       if (group->counter[0].is_gpufreq_counter)
483          j++;
484 
485       if (j < group->group->num_counters) {
486          if ((scroll <= row) && ((row - scroll) < max))
487             redraw_group_header(win, row - scroll, group->group->name);
488          row++;
489       }
490 
491       for (; j < group->group->num_counters; j++) {
492          if ((scroll <= row) && ((row - scroll) < max))
493             redraw_counter(win, row - scroll, group, j, row == current_cntr);
494          row++;
495       }
496    }
497 
498    /* convert back to physical (unscrolled) offset: */
499    row = max;
500 
501    redraw_group_header(win, row, "Status");
502    row++;
503 
504    /* Draw GPU freq row: */
505    redraw_gpufreq_counter(win, row);
506    row++;
507 
508    redraw_footer(win);
509 
510    refresh();
511 }
512 
513 static struct counter_group *
current_counter(int * ctr)514 current_counter(int *ctr)
515 {
516    int n = 0;
517 
518    for (unsigned i = 0; i < dev.ngroups; i++) {
519       struct counter_group *group = &dev.groups[i];
520       unsigned j = 0;
521 
522       if (group->counter[0].is_gpufreq_counter)
523          j++;
524 
525       /* account for group header: */
526       if (j < group->group->num_counters) {
527          /* cannot select group header.. return null to indicate this
528           * main_ui():
529           */
530          if (n == current_cntr)
531             return NULL;
532          n++;
533       }
534 
535       for (; j < group->group->num_counters; j++) {
536          if (n == current_cntr) {
537             if (ctr)
538                *ctr = j;
539             return group;
540          }
541          n++;
542       }
543    }
544 
545    assert(0);
546    return NULL;
547 }
548 
549 static void
counter_dialog(void)550 counter_dialog(void)
551 {
552    WINDOW *dialog;
553    struct counter_group *group;
554    int cnt = 0, current = 0, scroll;
555 
556    /* figure out dialog size: */
557    int dh = h / 2;
558    int dw = ctr_width + 2;
559 
560    group = current_counter(&cnt);
561 
562    /* find currently selected idx (note there can be discontinuities
563     * so the selected value does not map 1:1 to current idx)
564     */
565    uint32_t selected = group->counter[cnt].select_val;
566    for (int i = 0; i < group->group->num_countables; i++) {
567       if (group->group->countables[i].selector == selected) {
568          current = i;
569          break;
570       }
571    }
572 
573    /* scrolling offset, if dialog is too small for all the choices: */
574    scroll = 0;
575 
576    dialog = newwin(dh, dw, (h - dh) / 2, (w - dw) / 2);
577    box(dialog, 0, 0);
578    wrefresh(dialog);
579    keypad(dialog, true);
580 
581    while (true) {
582       int max = MIN2(dh - 2, group->group->num_countables);
583       int selector = -1;
584 
585       if ((current - scroll) >= (dh - 3)) {
586          scroll = current - (dh - 3);
587       } else if (current < scroll) {
588          scroll = current;
589       }
590 
591       for (int i = 0; i < max; i++) {
592          int n = scroll + i;
593          wmove(dialog, i + 1, 1);
594          if (n == current) {
595             assert(n < group->group->num_countables);
596             selector = group->group->countables[n].selector;
597             wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
598          }
599          if (n < group->group->num_countables)
600             waddstr(dialog, group->group->countables[n].name);
601          whline(dialog, ' ', dw - getcurx(dialog) - 1);
602          if (n == current)
603             wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
604       }
605 
606       assert(selector >= 0);
607 
608       switch (wgetch(dialog)) {
609       case KEY_UP:
610          current = MAX2(0, current - 1);
611          break;
612       case KEY_DOWN:
613          current = MIN2(group->group->num_countables - 1, current + 1);
614          break;
615       case KEY_LEFT:
616       case KEY_ENTER:
617          /* select new sampler */
618          select_counter(group, cnt, selector);
619          flush_ring();
620          config_save();
621          goto out;
622       case 'q':
623          goto out;
624       default:
625          /* ignore */
626          break;
627       }
628 
629       resample();
630    }
631 
632 out:
633    wborder(dialog, ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ');
634    delwin(dialog);
635 }
636 
637 static void
scroll_cntr(int amount)638 scroll_cntr(int amount)
639 {
640    if (amount < 0) {
641       current_cntr = MAX2(1, current_cntr + amount);
642       if (current_counter(NULL) == NULL) {
643          current_cntr = MAX2(1, current_cntr - 1);
644       }
645    } else {
646       current_cntr = MIN2(max_rows - 1, current_cntr + amount);
647       if (current_counter(NULL) == NULL)
648          current_cntr = MIN2(max_rows - 1, current_cntr + 1);
649    }
650 }
651 
652 static void
main_ui(void)653 main_ui(void)
654 {
655    WINDOW *mainwin;
656    uint64_t last_time = gettime_us();
657 
658    /* Run an initial sample to set up baseline counter values. */
659    resample();
660 
661    /* curses setup: */
662    mainwin = initscr();
663    if (!mainwin)
664       goto out;
665 
666    cbreak();
667    wtimeout(mainwin, options.refresh_ms);
668    noecho();
669    keypad(mainwin, true);
670    curs_set(0);
671    start_color();
672    init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
673    init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE);
674    init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE);
675 
676    while (true) {
677       switch (wgetch(mainwin)) {
678       case KEY_UP:
679          scroll_cntr(-1);
680          break;
681       case KEY_DOWN:
682          scroll_cntr(+1);
683          break;
684       case KEY_NPAGE: /* page-down */
685          /* TODO figure out # of rows visible? */
686          scroll_cntr(+15);
687          break;
688       case KEY_PPAGE: /* page-up */
689          /* TODO figure out # of rows visible? */
690          scroll_cntr(-15);
691          break;
692       case KEY_RIGHT:
693          counter_dialog();
694          break;
695       case 'q':
696          goto out;
697          break;
698       default:
699          /* ignore */
700          break;
701       }
702       resample();
703       redraw(mainwin);
704 
705       /* restore the counters every 0.5s in case the GPU has suspended,
706        * in which case the current selected countables will have reset:
707        */
708       uint64_t t = gettime_us();
709       if (delta(last_time, t) > 500000) {
710          restore_counter_groups();
711          flush_ring();
712          last_time = t;
713       }
714    }
715 
716    /* restore settings.. maybe we need an atexit()??*/
717 out:
718    delwin(mainwin);
719    endwin();
720    refresh();
721 }
722 
723 static void
dump_counters(void)724 dump_counters(void)
725 {
726    resample();
727    sleep_us(options.refresh_ms * 1000);
728    resample();
729 
730    for (unsigned i = 0; i < dev.ngroups; i++) {
731       const struct counter_group *group = &dev.groups[i];
732       for (unsigned j = 0; j < group->group->num_counters; j++) {
733          const char *label = group->label[j];
734          float val = (float) group->value_delta[j] * 1000000.0 /
735                      (float) group->sample_time_delta[j];
736 
737          int n = printf("%s: ", label) - 2;
738          while (n++ < ctr_width)
739             fputc(' ', stdout);
740 
741          n = printf("%" PRIu64, group->value_delta[j]);
742          while (n++ < 24)
743             fputc(' ', stdout);
744 
745          if (strstr(label, "CYCLE") ||
746              strstr(label, "BUSY") ||
747              strstr(label, "IDLE")) {
748             val = val / dev.max_freq * 100.0f;
749             printf(" %.2f%%\n", val);
750          } else {
751             printf("\n");
752          }
753       }
754    }
755 }
756 
757 static void
restore_counter_groups(void)758 restore_counter_groups(void)
759 {
760    for (unsigned i = 0; i < dev.ngroups; i++) {
761       struct counter_group *group = &dev.groups[i];
762 
763       for (unsigned j = 0; j < group->group->num_counters; j++) {
764          /* This should also write the CP_ALWAYS_COUNT selectable value into
765           * the reserved CP counter we use for GPU frequency measurement,
766           * avoiding someone else writing a different value there.
767           */
768          select_counter(group, j, group->counter[j].select_val);
769       }
770    }
771 }
772 
773 static void
setup_counter_groups(const struct fd_perfcntr_group * groups)774 setup_counter_groups(const struct fd_perfcntr_group *groups)
775 {
776    for (unsigned i = 0; i < dev.ngroups; i++) {
777       struct counter_group *group = &dev.groups[i];
778 
779       group->group = &groups[i];
780 
781       max_rows += group->group->num_counters + 1;
782 
783       /* We reserve the first counter of the CP group (first in the list) for
784        * measuring GPU frequency that's displayed in the footer.
785        */
786       if (i == 0) {
787          /* We won't be displaying the private counter alongside others. We
788           * also won't be displaying the group header if we're taking over
789           * the only counter (e.g. on a2xx).
790           */
791          max_rows--;
792          if (groups[0].num_counters < 2)
793             max_rows--;
794 
795          /* Enforce the CP_ALWAYS_COUNT countable for this counter. */
796          unsigned always_count_index = UINT32_MAX;
797          for (unsigned i = 0; i < groups[0].num_countables; ++i) {
798             if (strcmp(groups[0].countables[i].name, "PERF_CP_ALWAYS_COUNT"))
799                continue;
800 
801             always_count_index = i;
802             break;
803          }
804 
805          if (always_count_index < groups[0].num_countables) {
806             group->counter[0].select_val = groups[0].countables[always_count_index].selector;
807             group->counter[0].is_gpufreq_counter = true;
808          }
809       }
810 
811       for (unsigned j = 0; j < group->group->num_counters; j++) {
812          group->counter[j].counter = &group->group->counters[j];
813 
814          if (!group->counter[j].is_gpufreq_counter)
815             group->counter[j].select_val = j;
816       }
817 
818       for (unsigned j = 0; j < group->group->num_countables; j++) {
819          ctr_width =
820             MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
821       }
822    }
823 }
824 
825 /*
826  * configuration / persistence
827  */
828 
829 static config_t cfg;
830 static config_setting_t *setting;
831 
832 static void
config_sanitize_device_name(char * name)833 config_sanitize_device_name(char *name)
834 {
835    /* libconfig names allow alphanumeric characters, dashes, underscores and
836     * asterisks. Anything else in the device name (most commonly spaces and
837     * plus characters) should be converted to underscores.
838     */
839    for (char *s = name; *s; ++s) {
840       if (isalnum(*s) || *s == '-' || *s == '_' || *s == '*')
841          continue;
842       *s = '_';
843    }
844 }
845 
846 static void
config_save(void)847 config_save(void)
848 {
849    for (unsigned i = 0; i < dev.ngroups; i++) {
850       struct counter_group *group = &dev.groups[i];
851       config_setting_t *sect =
852          config_setting_get_member(setting, group->group->name);
853 
854       for (unsigned j = 0; j < group->group->num_counters; j++) {
855          /* Don't save the GPU frequency measurement counter. */
856          if (group->counter[j].is_gpufreq_counter)
857             continue;
858 
859          char name[] = "counter0000";
860          sprintf(name, "counter%d", j);
861          config_setting_t *s = config_setting_lookup(sect, name);
862          config_setting_set_int(s, group->counter[j].select_val);
863       }
864    }
865 
866    config_write_file(&cfg, "fdperf.cfg");
867 }
868 
869 static void
config_restore(void)870 config_restore(void)
871 {
872    config_init(&cfg);
873 
874    /* Read the file. If there is an error, report it and exit. */
875    if (!config_read_file(&cfg, "fdperf.cfg")) {
876       warn("could not restore settings");
877    }
878 
879    config_setting_t *root = config_root_setting(&cfg);
880 
881    /* per device settings: */
882    char device_name[64];
883    snprintf(device_name, sizeof(device_name), "%s", fd_dev_name(dev.dev_id));
884    config_sanitize_device_name(device_name);
885    setting = config_setting_get_member(root, device_name);
886    if (!setting)
887       setting = config_setting_add(root, device_name, CONFIG_TYPE_GROUP);
888    if (!setting)
889       return;
890 
891    for (unsigned i = 0; i < dev.ngroups; i++) {
892       struct counter_group *group = &dev.groups[i];
893       config_setting_t *sect =
894          config_setting_get_member(setting, group->group->name);
895 
896       if (!sect) {
897          sect =
898             config_setting_add(setting, group->group->name, CONFIG_TYPE_GROUP);
899       }
900 
901       for (unsigned j = 0; j < group->group->num_counters; j++) {
902          /* Don't restore the GPU frequency measurement counter. */
903          if (group->counter[j].is_gpufreq_counter)
904             continue;
905 
906          char name[] = "counter0000";
907          sprintf(name, "counter%d", j);
908          config_setting_t *s = config_setting_lookup(sect, name);
909          if (!s) {
910             config_setting_add(sect, name, CONFIG_TYPE_INT);
911             continue;
912          }
913          select_counter(group, j, config_setting_get_int(s));
914       }
915    }
916 }
917 
918 static void
print_usage(const char * argv0)919 print_usage(const char *argv0)
920 {
921    fprintf(stderr,
922            "Usage: %s [OPTION]...\n"
923            "\n"
924            "  -r <N>     refresh every N milliseconds\n"
925            "  -d         dump counters and exit\n"
926            "  -h         show this message\n",
927            argv0);
928    exit(2);
929 }
930 
931 static void
parse_options(int argc,char ** argv)932 parse_options(int argc, char **argv)
933 {
934    int c;
935 
936    while ((c = getopt(argc, argv, "r:d")) != -1) {
937       switch (c) {
938       case 'r':
939          options.refresh_ms = atoi(optarg);
940          break;
941       case 'd':
942          options.dump = true;
943          break;
944       default:
945          print_usage(argv[0]);
946          break;
947       }
948    }
949 }
950 
951 /*
952  * main
953  */
954 
955 int
main(int argc,char ** argv)956 main(int argc, char **argv)
957 {
958    parse_options(argc, argv);
959 
960    find_device();
961 
962    const struct fd_perfcntr_group *groups;
963    groups = fd_perfcntrs(dev.dev_id, &dev.ngroups);
964    if (!groups) {
965       errx(1, "no perfcntr support");
966    }
967 
968    dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
969 
970    setlocale(LC_NUMERIC, "en_US.UTF-8");
971 
972    setup_counter_groups(groups);
973    restore_counter_groups();
974    config_restore();
975    flush_ring();
976 
977    if (options.dump)
978       dump_counters();
979    else
980       main_ui();
981 
982    return 0;
983 }
984