1 /*
2 * Copyright © 2016 Rob Clark <robclark@freedesktop.org>
3 * All Rights Reserved.
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include <assert.h>
8 #include <ctype.h>
9 #include <curses.h>
10 #include <err.h>
11 #include <inttypes.h>
12 #include <libconfig.h>
13 #include <locale.h>
14 #include <stdint.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <time.h>
19 #include <unistd.h>
20 #include <xf86drm.h>
21
22 #include "drm/freedreno_drmif.h"
23 #include "drm/freedreno_ringbuffer.h"
24
25 #include "util/os_file.h"
26
27 #include "freedreno_dt.h"
28 #include "freedreno_perfcntr.h"
29
30 #define MAX_CNTR_PER_GROUP 24
31 #define REFRESH_MS 500
32
33 static struct {
34 int refresh_ms;
35 bool dump;
36 } options = {
37 .refresh_ms = REFRESH_MS,
38 .dump = false,
39 };
40
41 /* NOTE first counter group should always be CP, since we unconditionally
42 * use CP counter to measure the gpu freq.
43 */
44
45 struct counter_group {
46 const struct fd_perfcntr_group *group;
47
48 struct {
49 const struct fd_perfcntr_counter *counter;
50 uint16_t select_val;
51 bool is_gpufreq_counter;
52 } counter[MAX_CNTR_PER_GROUP];
53
54 /* name of currently selected counters (for UI): */
55 const char *label[MAX_CNTR_PER_GROUP];
56
57 uint64_t value[MAX_CNTR_PER_GROUP];
58 uint64_t value_delta[MAX_CNTR_PER_GROUP];
59
60 uint64_t sample_time[MAX_CNTR_PER_GROUP];
61 uint64_t sample_time_delta[MAX_CNTR_PER_GROUP];
62 };
63
64 static struct {
65 void *io;
66 uint32_t min_freq;
67 uint32_t max_freq;
68 /* per-generation table of counters: */
69 unsigned ngroups;
70 struct counter_group *groups;
71 /* drm device (for writing select regs via ring): */
72 struct fd_device *dev;
73 struct fd_pipe *pipe;
74 const struct fd_dev_id *dev_id;
75 struct fd_submit *submit;
76 struct fd_ringbuffer *ring;
77 } dev;
78
79 static void config_save(void);
80 static void config_restore(void);
81 static void restore_counter_groups(void);
82
83 /*
84 * helpers
85 */
86
87 static uint64_t
gettime_us(void)88 gettime_us(void)
89 {
90 struct timespec ts;
91 clock_gettime(CLOCK_MONOTONIC, &ts);
92 return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
93 }
94
95 static void
sleep_us(uint32_t us)96 sleep_us(uint32_t us)
97 {
98 const struct timespec ts = {
99 .tv_sec = us / 1000000,
100 .tv_nsec = (us % 1000000) * 1000,
101 };
102 clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
103 }
104
105 static uint64_t
delta(uint64_t a,uint64_t b)106 delta(uint64_t a, uint64_t b)
107 {
108 /* deal with rollover: */
109 if (a > b)
110 return 0xffffffffffffffffull - a + b;
111 else
112 return b - a;
113 }
114
115 static void
find_device(void)116 find_device(void)
117 {
118 int ret;
119
120 dev.dev = fd_device_open();
121 if (!dev.dev)
122 err(1, "could not open drm device");
123
124 dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);
125
126 dev.dev_id = fd_pipe_dev_id(dev.pipe);
127 if (!fd_dev_info_raw(dev.dev_id))
128 err(1, "unknown device");
129
130 printf("device: %s\n", fd_dev_name(dev.dev_id));
131
132 /* try MAX_FREQ first as that will work regardless of old dt
133 * dt bindings vs upstream bindings:
134 */
135 uint64_t val;
136 ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);
137 if (ret) {
138 printf("falling back to parsing DT bindings for freq\n");
139 if (!fd_dt_find_freqs(&dev.min_freq, &dev.max_freq))
140 err(1, "could not find GPU freqs");
141 } else {
142 dev.min_freq = 0;
143 dev.max_freq = val;
144 }
145
146 printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);
147
148 dev.io = fd_dt_find_io();
149 if (!dev.io) {
150 err(1, "could not map device");
151 }
152
153 fd_pipe_set_param(dev.pipe, FD_SYSPROF, 1);
154 }
155
156 /*
157 * perf-monitor
158 */
159
160 static void
flush_ring(void)161 flush_ring(void)
162 {
163 if (!dev.submit)
164 return;
165
166 struct fd_fence *fence = fd_submit_flush(dev.submit, -1, false);
167
168 if (!fence)
169 errx(1, "submit failed");
170
171 fd_fence_flush(fence);
172 fd_fence_del(fence);
173 fd_ringbuffer_del(dev.ring);
174 fd_submit_del(dev.submit);
175
176 dev.ring = NULL;
177 dev.submit = NULL;
178 }
179
180 static void
select_counter(struct counter_group * group,int ctr,int countable_val)181 select_counter(struct counter_group *group, int ctr, int countable_val)
182 {
183 assert(ctr < group->group->num_counters);
184
185 unsigned countable_idx = UINT32_MAX;
186 for (unsigned i = 0; i < group->group->num_countables; i++) {
187 if (countable_val != group->group->countables[i].selector)
188 continue;
189
190 countable_idx = i;
191 break;
192 }
193
194 if (countable_idx >= group->group->num_countables)
195 return;
196
197 group->label[ctr] = group->group->countables[countable_idx].name;
198 group->counter[ctr].select_val = countable_val;
199
200 if (!dev.submit) {
201 dev.submit = fd_submit_new(dev.pipe);
202 dev.ring = fd_submit_new_ringbuffer(
203 dev.submit, 0x1000, FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
204 }
205
206 /* bashing select register directly while gpu is active will end
207 * in tears.. so we need to write it via the ring:
208 *
209 * TODO it would help startup time, if gpu is loaded, to batch
210 * all the initial writes and do a single flush.. although that
211 * makes things more complicated for capturing inital sample value
212 */
213 struct fd_ringbuffer *ring = dev.ring;
214 switch (fd_dev_gen(dev.dev_id)) {
215 case 2:
216 case 3:
217 case 4:
218 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
219 OUT_RING(ring, 0x00000000);
220
221 if (group->group->counters[ctr].enable) {
222 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
223 OUT_RING(ring, 0);
224 }
225
226 if (group->group->counters[ctr].clear) {
227 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
228 OUT_RING(ring, 1);
229
230 OUT_PKT0(ring, group->group->counters[ctr].clear, 1);
231 OUT_RING(ring, 0);
232 }
233
234 OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);
235 OUT_RING(ring, countable_val);
236
237 if (group->group->counters[ctr].enable) {
238 OUT_PKT0(ring, group->group->counters[ctr].enable, 1);
239 OUT_RING(ring, 1);
240 }
241
242 break;
243 case 5:
244 case 6:
245 case 7:
246 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
247
248 if (group->group->counters[ctr].enable) {
249 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
250 OUT_RING(ring, 0);
251 }
252
253 if (group->group->counters[ctr].clear) {
254 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
255 OUT_RING(ring, 1);
256
257 OUT_PKT4(ring, group->group->counters[ctr].clear, 1);
258 OUT_RING(ring, 0);
259 }
260
261 OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);
262 OUT_RING(ring, countable_val);
263
264 if (group->group->counters[ctr].enable) {
265 OUT_PKT4(ring, group->group->counters[ctr].enable, 1);
266 OUT_RING(ring, 1);
267 }
268
269 break;
270 }
271 }
272
load_counter_value(struct counter_group * group,int ctr)273 static uint64_t load_counter_value(struct counter_group *group, int ctr)
274 {
275 /* We can read the counter register value as an uint64_t, as long as the
276 * lo/hi addresses are neighboring and the lo address is 8-byte-aligned.
277 * This currently holds for all counters exposed in perfcounter groups.
278 */
279 const struct fd_perfcntr_counter *counter = group->counter[ctr].counter;
280 assert(counter->counter_reg_lo + 1 == counter->counter_reg_hi);
281 assert(!((counter->counter_reg_lo * 4) % 8));
282 return *((uint64_t *) (dev.io + counter->counter_reg_lo * 4));
283 }
284
285 static void
resample_counter(struct counter_group * group,int ctr,uint64_t sample_time)286 resample_counter(struct counter_group *group, int ctr, uint64_t sample_time)
287 {
288 uint64_t previous_value = group->value[ctr];
289 group->value[ctr] = load_counter_value(group, ctr);
290 group->value_delta[ctr] = delta(previous_value, group->value[ctr]);
291
292 uint64_t previous_sample_time = group->sample_time[ctr];
293 group->sample_time[ctr] = sample_time;
294 group->sample_time_delta[ctr] = delta(previous_sample_time, sample_time);
295 }
296
297 /* sample all the counters: */
298 static void
resample(void)299 resample(void)
300 {
301 static uint64_t last_time;
302 uint64_t current_time = gettime_us();
303
304 if ((current_time - last_time) < (options.refresh_ms * 1000 / 2))
305 return;
306
307 last_time = current_time;
308
309 for (unsigned i = 0; i < dev.ngroups; i++) {
310 struct counter_group *group = &dev.groups[i];
311 for (unsigned j = 0; j < group->group->num_counters; j++) {
312 resample_counter(group, j, current_time);
313 }
314 }
315 }
316
317 /*
318 * The UI
319 */
320
321 #define COLOR_GROUP_HEADER 1
322 #define COLOR_FOOTER 2
323 #define COLOR_INVERSE 3
324
325 static int w, h;
326 static int ctr_width;
327 static int max_rows, current_cntr = 1;
328
329 static void
redraw_footer(WINDOW * win)330 redraw_footer(WINDOW *win)
331 {
332 char footer[128];
333 int n = snprintf(footer, sizeof(footer), " fdperf: %s (%.2fMHz..%.2fMHz)",
334 fd_dev_name(dev.dev_id), ((float)dev.min_freq) / 1000000.0,
335 ((float)dev.max_freq) / 1000000.0);
336
337 wmove(win, h - 1, 0);
338 wattron(win, COLOR_PAIR(COLOR_FOOTER));
339 waddstr(win, footer);
340 whline(win, ' ', w - n);
341 wattroff(win, COLOR_PAIR(COLOR_FOOTER));
342 }
343
344 static void
redraw_group_header(WINDOW * win,int row,const char * name)345 redraw_group_header(WINDOW *win, int row, const char *name)
346 {
347 wmove(win, row, 0);
348 wattron(win, A_BOLD);
349 wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));
350 waddstr(win, name);
351 whline(win, ' ', w - strlen(name));
352 wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));
353 wattroff(win, A_BOLD);
354 }
355
356 static void
redraw_counter_label(WINDOW * win,int row,const char * name,bool selected)357 redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)
358 {
359 int n = strlen(name);
360 assert(n <= ctr_width);
361 wmove(win, row, 0);
362 whline(win, ' ', ctr_width - n);
363 wmove(win, row, ctr_width - n);
364 if (selected)
365 wattron(win, COLOR_PAIR(COLOR_INVERSE));
366 waddstr(win, name);
367 if (selected)
368 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
369 waddstr(win, ": ");
370 }
371
372 static void
redraw_counter_value_cycles(WINDOW * win,float val)373 redraw_counter_value_cycles(WINDOW *win, float val)
374 {
375 char str[32];
376 int x = getcurx(win);
377 int valwidth = w - x;
378 int barwidth, n;
379
380 /* convert to fraction of max freq: */
381 val = val / (float)dev.max_freq;
382
383 /* figure out percentage-bar width: */
384 barwidth = (int)(val * valwidth);
385
386 /* sometimes things go over 100%.. idk why, could be
387 * things running faster than base clock, or counter
388 * summing up cycles in multiple cores?
389 */
390 barwidth = MIN2(barwidth, valwidth - 1);
391
392 n = snprintf(str, sizeof(str), "%.2f%%", 100.0 * val);
393 wattron(win, COLOR_PAIR(COLOR_INVERSE));
394 waddnstr(win, str, barwidth);
395 if (barwidth > n) {
396 whline(win, ' ', barwidth - n);
397 wmove(win, getcury(win), x + barwidth);
398 }
399 wattroff(win, COLOR_PAIR(COLOR_INVERSE));
400 if (barwidth < n)
401 waddstr(win, str + barwidth);
402 whline(win, ' ', w - getcurx(win));
403 }
404
405 static void
redraw_counter_value(WINDOW * win,int row,struct counter_group * group,int ctr)406 redraw_counter_value(WINDOW *win, int row, struct counter_group *group, int ctr)
407 {
408 char str[32];
409 int n = snprintf(str, sizeof(str), "%" PRIu64 " ", group->value_delta[ctr]);
410
411 whline(win, ' ', 24 - n);
412 wmove(win, row, getcurx(win) + 24 - n);
413 waddstr(win, str);
414
415 /* quick hack, if the label has "CYCLE" in the name, it is
416 * probably a cycle counter ;-)
417 * Perhaps add more info in rnndb schema to know how to
418 * treat individual counters (ie. which are cycles, and
419 * for those we want to present as a percentage do we
420 * need to scale the result.. ie. is it running at some
421 * multiple or divisor of core clk, etc)
422 *
423 * TODO it would be much more clever to get this from xml
424 * Also.. in some cases I think we want to know how many
425 * units the counter is counting for, ie. if a320 has 2x
426 * shader as a306 we might need to scale the result..
427 */
428 if (strstr(group->label[ctr], "CYCLE") ||
429 strstr(group->label[ctr], "BUSY") || strstr(group->label[ctr], "IDLE")) {
430 float cycles_val = (float) group->value_delta[ctr] * 1000000.0 /
431 (float) group->sample_time_delta[ctr];
432 redraw_counter_value_cycles(win, cycles_val);
433 } else {
434 whline(win, ' ', w - getcurx(win));
435 }
436 }
437
438 static void
redraw_counter(WINDOW * win,int row,struct counter_group * group,int ctr,bool selected)439 redraw_counter(WINDOW *win, int row, struct counter_group *group, int ctr,
440 bool selected)
441 {
442 redraw_counter_label(win, row, group->label[ctr], selected);
443 redraw_counter_value(win, row, group, ctr);
444 }
445
446 static void
redraw_gpufreq_counter(WINDOW * win,int row)447 redraw_gpufreq_counter(WINDOW *win, int row)
448 {
449 redraw_counter_label(win, row, "Freq (MHz)", false);
450
451 struct counter_group *group = &dev.groups[0];
452 float freq_val = (float) group->value_delta[0] / (float) group->sample_time_delta[0];
453
454 char str[32];
455 snprintf(str, sizeof(str), "%.2f", freq_val);
456
457 waddstr(win, str);
458 whline(win, ' ', w - getcurx(win));
459 }
460
461 static void
redraw(WINDOW * win)462 redraw(WINDOW *win)
463 {
464 static int scroll = 0;
465 int max, row = 0;
466
467 w = getmaxx(win);
468 h = getmaxy(win);
469
470 max = h - 3;
471
472 if ((current_cntr - scroll) > (max - 1)) {
473 scroll = current_cntr - (max - 1);
474 } else if ((current_cntr - 1) < scroll) {
475 scroll = current_cntr - 1;
476 }
477
478 for (unsigned i = 0; i < dev.ngroups; i++) {
479 struct counter_group *group = &dev.groups[i];
480 unsigned j = 0;
481
482 if (group->counter[0].is_gpufreq_counter)
483 j++;
484
485 if (j < group->group->num_counters) {
486 if ((scroll <= row) && ((row - scroll) < max))
487 redraw_group_header(win, row - scroll, group->group->name);
488 row++;
489 }
490
491 for (; j < group->group->num_counters; j++) {
492 if ((scroll <= row) && ((row - scroll) < max))
493 redraw_counter(win, row - scroll, group, j, row == current_cntr);
494 row++;
495 }
496 }
497
498 /* convert back to physical (unscrolled) offset: */
499 row = max;
500
501 redraw_group_header(win, row, "Status");
502 row++;
503
504 /* Draw GPU freq row: */
505 redraw_gpufreq_counter(win, row);
506 row++;
507
508 redraw_footer(win);
509
510 refresh();
511 }
512
513 static struct counter_group *
current_counter(int * ctr)514 current_counter(int *ctr)
515 {
516 int n = 0;
517
518 for (unsigned i = 0; i < dev.ngroups; i++) {
519 struct counter_group *group = &dev.groups[i];
520 unsigned j = 0;
521
522 if (group->counter[0].is_gpufreq_counter)
523 j++;
524
525 /* account for group header: */
526 if (j < group->group->num_counters) {
527 /* cannot select group header.. return null to indicate this
528 * main_ui():
529 */
530 if (n == current_cntr)
531 return NULL;
532 n++;
533 }
534
535 for (; j < group->group->num_counters; j++) {
536 if (n == current_cntr) {
537 if (ctr)
538 *ctr = j;
539 return group;
540 }
541 n++;
542 }
543 }
544
545 assert(0);
546 return NULL;
547 }
548
549 static void
counter_dialog(void)550 counter_dialog(void)
551 {
552 WINDOW *dialog;
553 struct counter_group *group;
554 int cnt = 0, current = 0, scroll;
555
556 /* figure out dialog size: */
557 int dh = h / 2;
558 int dw = ctr_width + 2;
559
560 group = current_counter(&cnt);
561
562 /* find currently selected idx (note there can be discontinuities
563 * so the selected value does not map 1:1 to current idx)
564 */
565 uint32_t selected = group->counter[cnt].select_val;
566 for (int i = 0; i < group->group->num_countables; i++) {
567 if (group->group->countables[i].selector == selected) {
568 current = i;
569 break;
570 }
571 }
572
573 /* scrolling offset, if dialog is too small for all the choices: */
574 scroll = 0;
575
576 dialog = newwin(dh, dw, (h - dh) / 2, (w - dw) / 2);
577 box(dialog, 0, 0);
578 wrefresh(dialog);
579 keypad(dialog, true);
580
581 while (true) {
582 int max = MIN2(dh - 2, group->group->num_countables);
583 int selector = -1;
584
585 if ((current - scroll) >= (dh - 3)) {
586 scroll = current - (dh - 3);
587 } else if (current < scroll) {
588 scroll = current;
589 }
590
591 for (int i = 0; i < max; i++) {
592 int n = scroll + i;
593 wmove(dialog, i + 1, 1);
594 if (n == current) {
595 assert(n < group->group->num_countables);
596 selector = group->group->countables[n].selector;
597 wattron(dialog, COLOR_PAIR(COLOR_INVERSE));
598 }
599 if (n < group->group->num_countables)
600 waddstr(dialog, group->group->countables[n].name);
601 whline(dialog, ' ', dw - getcurx(dialog) - 1);
602 if (n == current)
603 wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));
604 }
605
606 assert(selector >= 0);
607
608 switch (wgetch(dialog)) {
609 case KEY_UP:
610 current = MAX2(0, current - 1);
611 break;
612 case KEY_DOWN:
613 current = MIN2(group->group->num_countables - 1, current + 1);
614 break;
615 case KEY_LEFT:
616 case KEY_ENTER:
617 /* select new sampler */
618 select_counter(group, cnt, selector);
619 flush_ring();
620 config_save();
621 goto out;
622 case 'q':
623 goto out;
624 default:
625 /* ignore */
626 break;
627 }
628
629 resample();
630 }
631
632 out:
633 wborder(dialog, ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ');
634 delwin(dialog);
635 }
636
637 static void
scroll_cntr(int amount)638 scroll_cntr(int amount)
639 {
640 if (amount < 0) {
641 current_cntr = MAX2(1, current_cntr + amount);
642 if (current_counter(NULL) == NULL) {
643 current_cntr = MAX2(1, current_cntr - 1);
644 }
645 } else {
646 current_cntr = MIN2(max_rows - 1, current_cntr + amount);
647 if (current_counter(NULL) == NULL)
648 current_cntr = MIN2(max_rows - 1, current_cntr + 1);
649 }
650 }
651
652 static void
main_ui(void)653 main_ui(void)
654 {
655 WINDOW *mainwin;
656 uint64_t last_time = gettime_us();
657
658 /* Run an initial sample to set up baseline counter values. */
659 resample();
660
661 /* curses setup: */
662 mainwin = initscr();
663 if (!mainwin)
664 goto out;
665
666 cbreak();
667 wtimeout(mainwin, options.refresh_ms);
668 noecho();
669 keypad(mainwin, true);
670 curs_set(0);
671 start_color();
672 init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);
673 init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE);
674 init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE);
675
676 while (true) {
677 switch (wgetch(mainwin)) {
678 case KEY_UP:
679 scroll_cntr(-1);
680 break;
681 case KEY_DOWN:
682 scroll_cntr(+1);
683 break;
684 case KEY_NPAGE: /* page-down */
685 /* TODO figure out # of rows visible? */
686 scroll_cntr(+15);
687 break;
688 case KEY_PPAGE: /* page-up */
689 /* TODO figure out # of rows visible? */
690 scroll_cntr(-15);
691 break;
692 case KEY_RIGHT:
693 counter_dialog();
694 break;
695 case 'q':
696 goto out;
697 break;
698 default:
699 /* ignore */
700 break;
701 }
702 resample();
703 redraw(mainwin);
704
705 /* restore the counters every 0.5s in case the GPU has suspended,
706 * in which case the current selected countables will have reset:
707 */
708 uint64_t t = gettime_us();
709 if (delta(last_time, t) > 500000) {
710 restore_counter_groups();
711 flush_ring();
712 last_time = t;
713 }
714 }
715
716 /* restore settings.. maybe we need an atexit()??*/
717 out:
718 delwin(mainwin);
719 endwin();
720 refresh();
721 }
722
723 static void
dump_counters(void)724 dump_counters(void)
725 {
726 resample();
727 sleep_us(options.refresh_ms * 1000);
728 resample();
729
730 for (unsigned i = 0; i < dev.ngroups; i++) {
731 const struct counter_group *group = &dev.groups[i];
732 for (unsigned j = 0; j < group->group->num_counters; j++) {
733 const char *label = group->label[j];
734 float val = (float) group->value_delta[j] * 1000000.0 /
735 (float) group->sample_time_delta[j];
736
737 int n = printf("%s: ", label) - 2;
738 while (n++ < ctr_width)
739 fputc(' ', stdout);
740
741 n = printf("%" PRIu64, group->value_delta[j]);
742 while (n++ < 24)
743 fputc(' ', stdout);
744
745 if (strstr(label, "CYCLE") ||
746 strstr(label, "BUSY") ||
747 strstr(label, "IDLE")) {
748 val = val / dev.max_freq * 100.0f;
749 printf(" %.2f%%\n", val);
750 } else {
751 printf("\n");
752 }
753 }
754 }
755 }
756
757 static void
restore_counter_groups(void)758 restore_counter_groups(void)
759 {
760 for (unsigned i = 0; i < dev.ngroups; i++) {
761 struct counter_group *group = &dev.groups[i];
762
763 for (unsigned j = 0; j < group->group->num_counters; j++) {
764 /* This should also write the CP_ALWAYS_COUNT selectable value into
765 * the reserved CP counter we use for GPU frequency measurement,
766 * avoiding someone else writing a different value there.
767 */
768 select_counter(group, j, group->counter[j].select_val);
769 }
770 }
771 }
772
773 static void
setup_counter_groups(const struct fd_perfcntr_group * groups)774 setup_counter_groups(const struct fd_perfcntr_group *groups)
775 {
776 for (unsigned i = 0; i < dev.ngroups; i++) {
777 struct counter_group *group = &dev.groups[i];
778
779 group->group = &groups[i];
780
781 max_rows += group->group->num_counters + 1;
782
783 /* We reserve the first counter of the CP group (first in the list) for
784 * measuring GPU frequency that's displayed in the footer.
785 */
786 if (i == 0) {
787 /* We won't be displaying the private counter alongside others. We
788 * also won't be displaying the group header if we're taking over
789 * the only counter (e.g. on a2xx).
790 */
791 max_rows--;
792 if (groups[0].num_counters < 2)
793 max_rows--;
794
795 /* Enforce the CP_ALWAYS_COUNT countable for this counter. */
796 unsigned always_count_index = UINT32_MAX;
797 for (unsigned i = 0; i < groups[0].num_countables; ++i) {
798 if (strcmp(groups[0].countables[i].name, "PERF_CP_ALWAYS_COUNT"))
799 continue;
800
801 always_count_index = i;
802 break;
803 }
804
805 if (always_count_index < groups[0].num_countables) {
806 group->counter[0].select_val = groups[0].countables[always_count_index].selector;
807 group->counter[0].is_gpufreq_counter = true;
808 }
809 }
810
811 for (unsigned j = 0; j < group->group->num_counters; j++) {
812 group->counter[j].counter = &group->group->counters[j];
813
814 if (!group->counter[j].is_gpufreq_counter)
815 group->counter[j].select_val = j;
816 }
817
818 for (unsigned j = 0; j < group->group->num_countables; j++) {
819 ctr_width =
820 MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);
821 }
822 }
823 }
824
825 /*
826 * configuration / persistence
827 */
828
829 static config_t cfg;
830 static config_setting_t *setting;
831
832 static void
config_sanitize_device_name(char * name)833 config_sanitize_device_name(char *name)
834 {
835 /* libconfig names allow alphanumeric characters, dashes, underscores and
836 * asterisks. Anything else in the device name (most commonly spaces and
837 * plus characters) should be converted to underscores.
838 */
839 for (char *s = name; *s; ++s) {
840 if (isalnum(*s) || *s == '-' || *s == '_' || *s == '*')
841 continue;
842 *s = '_';
843 }
844 }
845
846 static void
config_save(void)847 config_save(void)
848 {
849 for (unsigned i = 0; i < dev.ngroups; i++) {
850 struct counter_group *group = &dev.groups[i];
851 config_setting_t *sect =
852 config_setting_get_member(setting, group->group->name);
853
854 for (unsigned j = 0; j < group->group->num_counters; j++) {
855 /* Don't save the GPU frequency measurement counter. */
856 if (group->counter[j].is_gpufreq_counter)
857 continue;
858
859 char name[] = "counter0000";
860 sprintf(name, "counter%d", j);
861 config_setting_t *s = config_setting_lookup(sect, name);
862 config_setting_set_int(s, group->counter[j].select_val);
863 }
864 }
865
866 config_write_file(&cfg, "fdperf.cfg");
867 }
868
869 static void
config_restore(void)870 config_restore(void)
871 {
872 config_init(&cfg);
873
874 /* Read the file. If there is an error, report it and exit. */
875 if (!config_read_file(&cfg, "fdperf.cfg")) {
876 warn("could not restore settings");
877 }
878
879 config_setting_t *root = config_root_setting(&cfg);
880
881 /* per device settings: */
882 char device_name[64];
883 snprintf(device_name, sizeof(device_name), "%s", fd_dev_name(dev.dev_id));
884 config_sanitize_device_name(device_name);
885 setting = config_setting_get_member(root, device_name);
886 if (!setting)
887 setting = config_setting_add(root, device_name, CONFIG_TYPE_GROUP);
888 if (!setting)
889 return;
890
891 for (unsigned i = 0; i < dev.ngroups; i++) {
892 struct counter_group *group = &dev.groups[i];
893 config_setting_t *sect =
894 config_setting_get_member(setting, group->group->name);
895
896 if (!sect) {
897 sect =
898 config_setting_add(setting, group->group->name, CONFIG_TYPE_GROUP);
899 }
900
901 for (unsigned j = 0; j < group->group->num_counters; j++) {
902 /* Don't restore the GPU frequency measurement counter. */
903 if (group->counter[j].is_gpufreq_counter)
904 continue;
905
906 char name[] = "counter0000";
907 sprintf(name, "counter%d", j);
908 config_setting_t *s = config_setting_lookup(sect, name);
909 if (!s) {
910 config_setting_add(sect, name, CONFIG_TYPE_INT);
911 continue;
912 }
913 select_counter(group, j, config_setting_get_int(s));
914 }
915 }
916 }
917
918 static void
print_usage(const char * argv0)919 print_usage(const char *argv0)
920 {
921 fprintf(stderr,
922 "Usage: %s [OPTION]...\n"
923 "\n"
924 " -r <N> refresh every N milliseconds\n"
925 " -d dump counters and exit\n"
926 " -h show this message\n",
927 argv0);
928 exit(2);
929 }
930
931 static void
parse_options(int argc,char ** argv)932 parse_options(int argc, char **argv)
933 {
934 int c;
935
936 while ((c = getopt(argc, argv, "r:d")) != -1) {
937 switch (c) {
938 case 'r':
939 options.refresh_ms = atoi(optarg);
940 break;
941 case 'd':
942 options.dump = true;
943 break;
944 default:
945 print_usage(argv[0]);
946 break;
947 }
948 }
949 }
950
951 /*
952 * main
953 */
954
955 int
main(int argc,char ** argv)956 main(int argc, char **argv)
957 {
958 parse_options(argc, argv);
959
960 find_device();
961
962 const struct fd_perfcntr_group *groups;
963 groups = fd_perfcntrs(dev.dev_id, &dev.ngroups);
964 if (!groups) {
965 errx(1, "no perfcntr support");
966 }
967
968 dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));
969
970 setlocale(LC_NUMERIC, "en_US.UTF-8");
971
972 setup_counter_groups(groups);
973 restore_counter_groups();
974 config_restore();
975 flush_ring();
976
977 if (options.dump)
978 dump_counters();
979 else
980 main_ui();
981
982 return 0;
983 }
984