1 /*
2 * Copyright © 2020 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file intel_measure.c
25 */
26
27 #include "intel_measure.h"
28
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <sys/stat.h>
34 #include <sys/types.h>
35 #include <unistd.h>
36
37 #define __STDC_FORMAT_MACROS 1
38 #include <inttypes.h>
39
40 #include "dev/intel_device_info.h"
41 #include "util/debug.h"
42 #include "util/macros.h"
43 #include "util/u_debug.h"
44
45
46 static const struct debug_control debug_control[] = {
47 { "draw", INTEL_MEASURE_DRAW },
48 { "rt", INTEL_MEASURE_RENDERPASS },
49 { "shader", INTEL_MEASURE_SHADER },
50 { "batch", INTEL_MEASURE_BATCH },
51 { "frame", INTEL_MEASURE_FRAME },
52 { NULL, 0 }
53 };
54 static struct intel_measure_config config;
55
56 void
intel_measure_init(struct intel_measure_device * device)57 intel_measure_init(struct intel_measure_device *device)
58 {
59 static bool once = false;
60 const char *env = getenv("INTEL_MEASURE");
61 if (unlikely(!once)) {
62 once = true;
63 memset(&config, 0, sizeof(struct intel_measure_config));
64 if (!env)
65 return;
66
67 config.file = stderr;
68 config.flags = parse_debug_string(env, debug_control);
69 if (!config.flags)
70 config.flags = INTEL_MEASURE_DRAW;
71 config.enabled = true;
72 config.event_interval = 1;
73 config.control_fh = -1;
74
75 /* Overflows of the following defaults will drop data and generate a
76 * warning on the output filehandle.
77 */
78
79 /* default batch_size allows for 8k renders in a single batch */
80 const int DEFAULT_BATCH_SIZE = 16 * 1024;
81 config.batch_size = DEFAULT_BATCH_SIZE;
82
83 /* Default buffer_size allows for 16k batches per line of output in the
84 * csv. Overflow may occur for offscreen workloads or large 'interval'
85 * settings.
86 */
87 const int DEFAULT_BUFFER_SIZE = 16 * 1024;
88 config.buffer_size = DEFAULT_BUFFER_SIZE;
89
90 const char *filename = strstr(env, "file=");
91 const char *start_frame_s = strstr(env, "start=");
92 const char *count_frame_s = strstr(env, "count=");
93 const char *control_path = strstr(env, "control=");
94 const char *interval_s = strstr(env, "interval=");
95 const char *batch_size_s = strstr(env, "batch_size=");
96 const char *buffer_size_s = strstr(env, "buffer_size=");
97 while (true) {
98 char *sep = strrchr(env, ',');
99 if (sep == NULL)
100 break;
101 *sep = '\0';
102 }
103
104 if (filename && !__check_suid()) {
105 filename += 5;
106 config.file = fopen(filename, "w");
107 if (!config.file) {
108 fprintf(stderr, "INTEL_MEASURE failed to open output file %s: %s\n",
109 filename, strerror (errno));
110 abort();
111 }
112 }
113
114 if (start_frame_s) {
115 start_frame_s += 6;
116 const int start_frame = atoi(start_frame_s);
117 if (start_frame < 0) {
118 fprintf(stderr, "INTEL_MEASURE start frame may "
119 "not be negative: %d\n", start_frame);
120 abort();
121 }
122
123 config.start_frame = start_frame;
124 config.enabled = false;
125 }
126
127 if (count_frame_s) {
128 count_frame_s += 6;
129 const int count_frame = atoi(count_frame_s);
130 if (count_frame <= 0) {
131 fprintf(stderr, "INTEL_MEASURE count frame must be positive: %d\n",
132 count_frame);
133 abort();
134 }
135
136 config.end_frame = config.start_frame + count_frame;
137 }
138
139 if (control_path) {
140 control_path += 8;
141 if (mkfifoat(AT_FDCWD, control_path, O_CREAT | S_IRUSR | S_IWUSR)) {
142 if (errno != EEXIST) {
143 fprintf(stderr, "INTEL_MEASURE failed to create control "
144 "fifo %s: %s\n", control_path, strerror (errno));
145 abort();
146 }
147 }
148
149 config.control_fh = openat(AT_FDCWD, control_path,
150 O_RDONLY | O_NONBLOCK);
151 if (config.control_fh == -1) {
152 fprintf(stderr, "INTEL_MEASURE failed to open control fifo "
153 "%s: %s\n", control_path, strerror (errno));
154 abort();
155 }
156
157 /* when using a control fifo, do not start until the user triggers
158 * capture
159 */
160 config.enabled = false;
161 }
162
163 if (interval_s) {
164 interval_s += 9;
165 const int event_interval = atoi(interval_s);
166 if (event_interval < 1) {
167 fprintf(stderr, "INTEL_MEASURE event_interval must be positive: "
168 "%d\n", event_interval);
169 abort();
170 }
171 config.event_interval = event_interval;
172 }
173
174 if (batch_size_s) {
175 batch_size_s += 11;
176 const int batch_size = atoi(batch_size_s);
177 if (batch_size < DEFAULT_BATCH_SIZE) {
178 fprintf(stderr, "INTEL_MEASURE minimum batch_size is 4k: "
179 "%d\n", batch_size);
180 abort();
181 }
182 if (batch_size > DEFAULT_BATCH_SIZE * 1024) {
183 fprintf(stderr, "INTEL_MEASURE batch_size limited to 4M: "
184 "%d\n", batch_size);
185 abort();
186 }
187
188 config.batch_size = batch_size;
189 }
190
191 if (buffer_size_s) {
192 buffer_size_s += 12;
193 const int buffer_size = atoi(buffer_size_s);
194 if (buffer_size < DEFAULT_BUFFER_SIZE) {
195 fprintf(stderr, "INTEL_MEASURE minimum buffer_size is 1k: "
196 "%d\n", DEFAULT_BUFFER_SIZE);
197 }
198 if (buffer_size > DEFAULT_BUFFER_SIZE * 1024) {
199 fprintf(stderr, "INTEL_MEASURE buffer_size limited to 1M: "
200 "%d\n", buffer_size);
201 }
202
203 config.buffer_size = buffer_size;
204 }
205
206 fputs("draw_start,draw_end,frame,batch,"
207 "event_index,event_count,type,count,vs,tcs,tes,"
208 "gs,fs,cs,framebuffer,idle_ns,time_ns\n",
209 config.file);
210 }
211
212 device->config = NULL;
213 device->frame = 0;
214 pthread_mutex_init(&device->mutex, NULL);
215 list_inithead(&device->queued_snapshots);
216
217 if (env)
218 device->config = &config;
219 }
220
221 const char *
intel_measure_snapshot_string(enum intel_measure_snapshot_type type)222 intel_measure_snapshot_string(enum intel_measure_snapshot_type type)
223 {
224 const char *names[] = {
225 [INTEL_SNAPSHOT_UNDEFINED] = "undefined",
226 [INTEL_SNAPSHOT_BLIT] = "blit",
227 [INTEL_SNAPSHOT_CCS_AMBIGUATE] = "ccs ambiguate",
228 [INTEL_SNAPSHOT_CCS_COLOR_CLEAR] = "ccs color clear",
229 [INTEL_SNAPSHOT_CCS_PARTIAL_RESOLVE] = "ccs partial resolve",
230 [INTEL_SNAPSHOT_CCS_RESOLVE] = "ccs resolve",
231 [INTEL_SNAPSHOT_COMPUTE] = "compute",
232 [INTEL_SNAPSHOT_COPY] = "copy",
233 [INTEL_SNAPSHOT_DRAW] = "draw",
234 [INTEL_SNAPSHOT_HIZ_AMBIGUATE] = "hiz ambiguate",
235 [INTEL_SNAPSHOT_HIZ_CLEAR] = "hiz clear",
236 [INTEL_SNAPSHOT_HIZ_RESOLVE] = "hiz resolve",
237 [INTEL_SNAPSHOT_MCS_COLOR_CLEAR] = "mcs color clear",
238 [INTEL_SNAPSHOT_MCS_PARTIAL_RESOLVE] = "mcs partial resolve",
239 [INTEL_SNAPSHOT_SLOW_COLOR_CLEAR] = "slow color clear",
240 [INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR] = "slow depth clear",
241 [INTEL_SNAPSHOT_SECONDARY_BATCH] = "secondary command buffer",
242 [INTEL_SNAPSHOT_END] = "end",
243 };
244 assert(type < ARRAY_SIZE(names));
245 assert(names[type] != NULL);
246 assert(type != INTEL_SNAPSHOT_UNDEFINED);
247 return names[type];
248 }
249
250 /**
251 * Indicate to the caller whether a new snapshot should be started.
252 *
253 * Callers provide rendering state to this method to determine whether the
254 * current start event should be skipped. Depending on the configuration
255 * flags, a new snapshot may start:
256 * - at every event
257 * - when the program changes
258 * - after a batch is submitted
259 * - at frame boundaries
260 *
261 * Returns true if a snapshot should be started.
262 */
263 bool
intel_measure_state_changed(const struct intel_measure_batch * batch,uintptr_t vs,uintptr_t tcs,uintptr_t tes,uintptr_t gs,uintptr_t fs,uintptr_t cs)264 intel_measure_state_changed(const struct intel_measure_batch *batch,
265 uintptr_t vs, uintptr_t tcs, uintptr_t tes,
266 uintptr_t gs, uintptr_t fs, uintptr_t cs)
267 {
268 if (batch->index == 0) {
269 /* always record the first event */
270 return true;
271 }
272
273 const struct intel_measure_snapshot *last_snap =
274 &batch->snapshots[batch->index - 1];
275
276 if (config.flags & INTEL_MEASURE_DRAW)
277 return true;
278
279 if (batch->index % 2 == 0) {
280 /* no snapshot is running, but we have a start event */
281 return true;
282 }
283
284 if (config.flags & (INTEL_MEASURE_FRAME | INTEL_MEASURE_BATCH)) {
285 /* only start collection when index == 0, at the beginning of a batch */
286 return false;
287 }
288
289 if (config.flags & INTEL_MEASURE_RENDERPASS) {
290 return ((last_snap->framebuffer != batch->framebuffer) ||
291 /* compute workloads are always in their own renderpass */
292 (cs != 0));
293 }
294
295 /* remaining comparisons check the state of the render pipeline for
296 * INTEL_MEASURE_PROGRAM
297 */
298 assert(config.flags & INTEL_MEASURE_SHADER);
299
300 if (!vs && !tcs && !tes && !gs && !fs && !cs) {
301 /* blorp always changes program */
302 return true;
303 }
304
305 return (last_snap->vs != (uintptr_t) vs ||
306 last_snap->tcs != (uintptr_t) tcs ||
307 last_snap->tes != (uintptr_t) tes ||
308 last_snap->gs != (uintptr_t) gs ||
309 last_snap->fs != (uintptr_t) fs ||
310 last_snap->cs != (uintptr_t) cs);
311 }
312
313 /**
314 * Notify intel_measure that a frame is about to begin.
315 *
316 * Configuration values and the control fifo may commence measurement at frame
317 * boundaries.
318 */
319 void
intel_measure_frame_transition(unsigned frame)320 intel_measure_frame_transition(unsigned frame)
321 {
322 if (frame == config.start_frame)
323 config.enabled = true;
324 else if (frame == config.end_frame)
325 config.enabled = false;
326
327 /* user commands to the control fifo will override any start/count
328 * environment settings
329 */
330 if (config.control_fh != -1) {
331 while (true) {
332 const unsigned BUF_SIZE = 128;
333 char buf[BUF_SIZE];
334 ssize_t bytes = read(config.control_fh, buf, BUF_SIZE - 1);
335 if (bytes == 0)
336 break;
337 if (bytes == -1) {
338 fprintf(stderr, "INTEL_MEASURE failed to read control fifo: %s\n",
339 strerror(errno));
340 abort();
341 }
342
343 buf[bytes] = '\0';
344 char *nptr = buf, *endptr = buf;
345 while (*nptr != '\0' && *endptr != '\0') {
346 long fcount = strtol(nptr, &endptr, 10);
347 if (nptr == endptr) {
348 config.enabled = false;
349 fprintf(stderr, "INTEL_MEASURE invalid frame count on "
350 "control fifo.\n");
351 lseek(config.control_fh, 0, SEEK_END);
352 break;
353 } else if (fcount == 0) {
354 config.enabled = false;
355 } else {
356 config.enabled = true;
357 config.end_frame = frame + fcount;
358 }
359
360 nptr = endptr + 1;
361 }
362 }
363 }
364 }
365
366 #define TIMESTAMP_BITS 36
367 static uint64_t
raw_timestamp_delta(uint64_t time0,uint64_t time1)368 raw_timestamp_delta(uint64_t time0, uint64_t time1)
369 {
370 if (time0 > time1) {
371 return (1ULL << TIMESTAMP_BITS) + time1 - time0;
372 } else {
373 return time1 - time0;
374 }
375 }
376
377 /**
378 * Verify that rendering has completed for the batch
379 *
380 * Rendering is complete when the last timestamp has been written.
381 */
382 bool
intel_measure_ready(struct intel_measure_batch * batch)383 intel_measure_ready(struct intel_measure_batch *batch)
384 {
385 assert(batch->timestamps);
386 assert(batch->index > 1);
387 return (batch->timestamps[batch->index - 1] != 0);
388 }
389
390 /**
391 * Submit completed snapshots for buffering.
392 *
393 * Snapshot data becomes available when asynchronous rendering completes.
394 * Depending on configuration, snapshot data may need to be collated before
395 * writing to the output file.
396 */
397 static void
intel_measure_push_result(struct intel_measure_device * device,struct intel_measure_batch * batch)398 intel_measure_push_result(struct intel_measure_device *device,
399 struct intel_measure_batch *batch)
400 {
401 struct intel_measure_ringbuffer *rb = device->ringbuffer;
402
403 uint64_t *timestamps = batch->timestamps;
404 assert(timestamps != NULL);
405 assert(timestamps[0] != 0);
406
407 for (int i = 0; i < batch->index; i += 2) {
408 const struct intel_measure_snapshot *begin = &batch->snapshots[i];
409 const struct intel_measure_snapshot *end = &batch->snapshots[i+1];
410
411 assert (end->type == INTEL_SNAPSHOT_END);
412
413 if (begin->type == INTEL_SNAPSHOT_SECONDARY_BATCH) {
414 assert(begin->secondary != NULL);
415 begin->secondary->batch_count = batch->batch_count;
416 intel_measure_push_result(device, begin->secondary);
417 continue;
418 }
419
420 const uint64_t prev_end_ts = rb->results[rb->head].end_ts;
421
422 /* advance ring buffer */
423 if (++rb->head == config.buffer_size)
424 rb->head = 0;
425 if (rb->head == rb->tail) {
426 static bool warned = false;
427 if (unlikely(!warned)) {
428 fprintf(config.file,
429 "WARNING: Buffered data exceeds INTEL_MEASURE limit: %d. "
430 "Data has been dropped. "
431 "Increase setting with INTEL_MEASURE=buffer_size={count}\n",
432 config.buffer_size);
433 warned = true;
434 }
435 break;
436 }
437
438 struct intel_measure_buffered_result *buffered_result =
439 &rb->results[rb->head];
440
441 memset(buffered_result, 0, sizeof(*buffered_result));
442 memcpy(&buffered_result->snapshot, begin,
443 sizeof(struct intel_measure_snapshot));
444 buffered_result->start_ts = timestamps[i];
445 buffered_result->end_ts = timestamps[i+1];
446 buffered_result->idle_duration =
447 raw_timestamp_delta(prev_end_ts, buffered_result->start_ts);
448 buffered_result->frame = batch->frame;
449 buffered_result->batch_count = batch->batch_count;
450 buffered_result->event_index = i / 2;
451 buffered_result->snapshot.event_count = end->event_count;
452 }
453 }
454
455 static unsigned
ringbuffer_size(const struct intel_measure_ringbuffer * rb)456 ringbuffer_size(const struct intel_measure_ringbuffer *rb)
457 {
458 unsigned head = rb->head;
459 if (head < rb->tail)
460 head += config.buffer_size;
461 return head - rb->tail;
462 }
463
464 static const struct intel_measure_buffered_result *
ringbuffer_pop(struct intel_measure_ringbuffer * rb)465 ringbuffer_pop(struct intel_measure_ringbuffer *rb)
466 {
467 if (rb->tail == rb->head) {
468 /* encountered ringbuffer overflow while processing events */
469 return NULL;
470 }
471
472 if (++rb->tail == config.buffer_size)
473 rb->tail = 0;
474 return &rb->results[rb->tail];
475 }
476
477 static const struct intel_measure_buffered_result *
ringbuffer_peek(const struct intel_measure_ringbuffer * rb,unsigned index)478 ringbuffer_peek(const struct intel_measure_ringbuffer *rb, unsigned index)
479 {
480 int result_offset = rb->tail + index + 1;
481 if (result_offset >= config.buffer_size)
482 result_offset -= config.buffer_size;
483 return &rb->results[result_offset];
484 }
485
486
487 /**
488 * Determine the number of buffered events that must be combined for the next
489 * line of csv output. Returns 0 if more events are needed.
490 */
491 static unsigned
buffered_event_count(struct intel_measure_device * device)492 buffered_event_count(struct intel_measure_device *device)
493 {
494 const struct intel_measure_ringbuffer *rb = device->ringbuffer;
495 const unsigned buffered_event_count = ringbuffer_size(rb);
496 if (buffered_event_count == 0) {
497 /* no events to collect */
498 return 0;
499 }
500
501 /* count the number of buffered events required to meet the configuration */
502 if (config.flags & (INTEL_MEASURE_DRAW |
503 INTEL_MEASURE_RENDERPASS |
504 INTEL_MEASURE_SHADER)) {
505 /* For these flags, every buffered event represents a line in the
506 * output. None of these events span batches. If the event interval
507 * crosses a batch boundary, then the next interval starts with the new
508 * batch.
509 */
510 return 1;
511 }
512
513 const unsigned start_frame = ringbuffer_peek(rb, 0)->frame;
514 if (config.flags & INTEL_MEASURE_BATCH) {
515 /* each buffered event is a command buffer. The number of events to
516 * process is the same as the interval, unless the interval crosses a
517 * frame boundary
518 */
519 if (buffered_event_count < config.event_interval) {
520 /* not enough events */
521 return 0;
522 }
523
524 /* Imperfect frame tracking requires us to allow for *older* frames */
525 if (ringbuffer_peek(rb, config.event_interval - 1)->frame <= start_frame) {
526 /* No frame transition. The next {interval} events should be combined. */
527 return config.event_interval;
528 }
529
530 /* Else a frame transition occurs within the interval. Find the
531 * transition, so the following line of output begins with the batch
532 * that starts the new frame.
533 */
534 for (int event_index = 1;
535 event_index <= config.event_interval;
536 ++event_index) {
537 if (ringbuffer_peek(rb, event_index)->frame > start_frame)
538 return event_index;
539 }
540
541 assert(false);
542 }
543
544 /* Else we need to search buffered events to find the matching frame
545 * transition for our interval.
546 */
547 assert(config.flags & INTEL_MEASURE_FRAME);
548 for (int event_index = 1;
549 event_index < buffered_event_count;
550 ++event_index) {
551 const int latest_frame = ringbuffer_peek(rb, event_index)->frame;
552 if (latest_frame - start_frame >= config.event_interval)
553 return event_index;
554 }
555
556 return 0;
557 }
558
559 /**
560 * Take result_count events from the ringbuffer and output them as a single
561 * line.
562 */
563 static void
print_combined_results(struct intel_measure_device * measure_device,int result_count,struct intel_device_info * info)564 print_combined_results(struct intel_measure_device *measure_device,
565 int result_count,
566 struct intel_device_info *info)
567 {
568 if (result_count == 0)
569 return;
570
571 struct intel_measure_ringbuffer *result_rb = measure_device->ringbuffer;
572 assert(ringbuffer_size(result_rb) >= result_count);
573 const struct intel_measure_buffered_result* start_result =
574 ringbuffer_pop(result_rb);
575 const struct intel_measure_buffered_result* current_result = start_result;
576
577 if (start_result == NULL)
578 return;
579 --result_count;
580
581 uint64_t duration_ts = raw_timestamp_delta(start_result->start_ts,
582 current_result->end_ts);
583 unsigned event_count = start_result->snapshot.event_count;
584 while (result_count-- > 0) {
585 assert(ringbuffer_size(result_rb) > 0);
586 current_result = ringbuffer_pop(result_rb);
587 if (current_result == NULL)
588 return;
589 duration_ts += raw_timestamp_delta(current_result->start_ts,
590 current_result->end_ts);
591 event_count += current_result->snapshot.event_count;
592 }
593
594 const struct intel_measure_snapshot *begin = &start_result->snapshot;
595 fprintf(config.file, "%"PRIu64",%"PRIu64",%u,%u,%u,%u,%s,%u,"
596 "0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR",0x%"PRIxPTR","
597 "0x%"PRIxPTR",0x%"PRIxPTR",%"PRIu64",%"PRIu64"\n",
598 start_result->start_ts, current_result->end_ts,
599 start_result->frame, start_result->batch_count,
600 start_result->event_index, event_count,
601 begin->event_name, begin->count,
602 begin->vs, begin->tcs, begin->tes, begin->gs, begin->fs, begin->cs,
603 begin->framebuffer,
604 intel_device_info_timebase_scale(info, start_result->idle_duration),
605 intel_device_info_timebase_scale(info, duration_ts));
606 }
607
608 /**
609 * Empty the ringbuffer of events that can be printed.
610 */
611 static void
intel_measure_print(struct intel_measure_device * device,struct intel_device_info * info)612 intel_measure_print(struct intel_measure_device *device,
613 struct intel_device_info *info)
614 {
615 while (true) {
616 const int events_to_combine = buffered_event_count(device);
617 if (events_to_combine == 0)
618 break;
619 print_combined_results(device, events_to_combine, info);
620 }
621 }
622
623 /**
624 * Collect snapshots from completed command buffers and submit them to
625 * intel_measure for printing.
626 */
627 void
intel_measure_gather(struct intel_measure_device * measure_device,struct intel_device_info * info)628 intel_measure_gather(struct intel_measure_device *measure_device,
629 struct intel_device_info *info)
630 {
631 pthread_mutex_lock(&measure_device->mutex);
632
633 /* Iterate snapshots and collect if ready. Each snapshot queue will be
634 * in-order, but we must determine which queue has the oldest batch.
635 */
636 /* iterate snapshots and collect if ready */
637 while (!list_is_empty(&measure_device->queued_snapshots)) {
638 struct intel_measure_batch *batch =
639 list_first_entry(&measure_device->queued_snapshots,
640 struct intel_measure_batch, link);
641
642 if (!intel_measure_ready(batch)) {
643 /* command buffer has begun execution on the gpu, but has not
644 * completed.
645 */
646 break;
647 }
648
649 list_del(&batch->link);
650 assert(batch->index % 2 == 0);
651
652 intel_measure_push_result(measure_device, batch);
653
654 batch->index = 0;
655 batch->frame = 0;
656 }
657
658 intel_measure_print(measure_device, info);
659 pthread_mutex_unlock(&measure_device->mutex);
660 }
661
662