1 /*
2 * Copyright © 2020 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file intel_measure.c
25 */
26
27 #include "intel_measure.h"
28
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <sys/stat.h>
34 #include <sys/types.h>
35 #include <unistd.h>
36
37 #define __STDC_FORMAT_MACROS 1
38 #include <inttypes.h>
39
40 #include "dev/intel_device_info.h"
41 #include "util/os_time.h"
42 #include "util/u_debug.h"
43 #include "util/macros.h"
44
45
46 static const struct debug_control debug_control[] = {
47 { "draw", INTEL_MEASURE_DRAW },
48 { "rt", INTEL_MEASURE_RENDERPASS },
49 { "shader", INTEL_MEASURE_SHADER },
50 { "batch", INTEL_MEASURE_BATCH },
51 { "frame", INTEL_MEASURE_FRAME },
52 { NULL, 0 }
53 };
54 static struct intel_measure_config config;
55
56 void
intel_measure_init(struct intel_measure_device * device)57 intel_measure_init(struct intel_measure_device *device)
58 {
59 static bool once = false;
60 const char *env = getenv("INTEL_MEASURE");
61 if (unlikely(!once)) {
62 once = true;
63 memset(&config, 0, sizeof(struct intel_measure_config));
64 if (!env)
65 return;
66
67 char env_copy[1024];
68 strncpy(env_copy, env, 1024);
69 env_copy[1023] = '\0';
70
71 config.file = stderr;
72 config.flags = parse_debug_string(env_copy, debug_control);
73 if (!config.flags)
74 config.flags = INTEL_MEASURE_DRAW;
75 config.enabled = true;
76 config.event_interval = 1;
77 config.control_fh = -1;
78
79 /* Overflows of the following defaults will drop data and generate a
80 * warning on the output filehandle.
81 */
82
83 /* default batch_size allows for 32k renders in a single batch */
84 const int MINIMUM_BATCH_SIZE = 1024;
85 const int DEFAULT_BATCH_SIZE = 64 * 1024;
86 config.batch_size = DEFAULT_BATCH_SIZE;
87
88 /* Default buffer_size allows for 64k batches per line of output in the
89 * csv. Overflow may occur for offscreen workloads or large 'interval'
90 * settings.
91 */
92 const int MINIMUM_BUFFER_SIZE = 1024;
93 const int DEFAULT_BUFFER_SIZE = 64 * 1024;
94 config.buffer_size = DEFAULT_BUFFER_SIZE;
95
96 const char *filename = strstr(env_copy, "file=");
97 const char *start_frame_s = strstr(env_copy, "start=");
98 const char *count_frame_s = strstr(env_copy, "count=");
99 const char *control_path = strstr(env_copy, "control=");
100 const char *interval_s = strstr(env_copy, "interval=");
101 const char *batch_size_s = strstr(env_copy, "batch_size=");
102 const char *buffer_size_s = strstr(env_copy, "buffer_size=");
103 const char *cpu_s = strstr(env_copy, "cpu");
104 const char *no_ogl = strstr(env_copy, "nogl");
105 while (true) {
106 char *sep = strrchr(env_copy, ',');
107 if (sep == NULL)
108 break;
109 *sep = '\0';
110 }
111
112 if (no_ogl && device->type == INTEL_MEASURE_DEVICE_OGL) {
113 config.enabled = false;
114 return;
115 }
116
117 if (filename && __normal_user()) {
118 filename += 5;
119 config.deferred_create_filename = strdup(filename);
120 }
121
122 if (start_frame_s) {
123 start_frame_s += 6;
124 const int start_frame = atoi(start_frame_s);
125 if (start_frame < 0) {
126 fprintf(stderr, "INTEL_MEASURE start frame may "
127 "not be negative: %d\n", start_frame);
128 abort();
129 }
130
131 config.start_frame = start_frame;
132 config.enabled = false;
133 }
134
135 if (count_frame_s) {
136 count_frame_s += 6;
137 const int count_frame = atoi(count_frame_s);
138 if (count_frame <= 0) {
139 fprintf(stderr, "INTEL_MEASURE count frame must be positive: %d\n",
140 count_frame);
141 abort();
142 }
143
144 config.end_frame = config.start_frame + count_frame;
145 }
146
147 if (control_path) {
148 control_path += 8;
149 if (mkfifoat(AT_FDCWD, control_path, O_CREAT | S_IRUSR | S_IWUSR)) {
150 if (errno != EEXIST) {
151 fprintf(stderr, "INTEL_MEASURE failed to create control "
152 "fifo %s: %s\n", control_path, strerror (errno));
153 abort();
154 }
155 }
156
157 config.control_fh = openat(AT_FDCWD, control_path,
158 O_RDONLY | O_NONBLOCK);
159 if (config.control_fh == -1) {
160 fprintf(stderr, "INTEL_MEASURE failed to open control fifo "
161 "%s: %s\n", control_path, strerror (errno));
162 abort();
163 }
164
165 /* when using a control fifo, do not start until the user triggers
166 * capture
167 */
168 config.enabled = false;
169 }
170
171 if (interval_s) {
172 interval_s += 9;
173 const int event_interval = atoi(interval_s);
174 if (event_interval < 1) {
175 fprintf(stderr, "INTEL_MEASURE event_interval must be positive: "
176 "%d\n", event_interval);
177 abort();
178 }
179 config.event_interval = event_interval;
180 }
181
182 if (batch_size_s) {
183 batch_size_s += 11;
184 const int batch_size = atoi(batch_size_s);
185 if (batch_size < MINIMUM_BATCH_SIZE ) {
186 fprintf(stderr, "INTEL_MEASURE minimum batch_size is 1k: "
187 "%d\n", batch_size);
188 abort();
189 }
190 if (batch_size > MINIMUM_BATCH_SIZE * 4 * 1024) {
191 fprintf(stderr, "INTEL_MEASURE batch_size limited to 4M: "
192 "%d\n", batch_size);
193 abort();
194 }
195
196 config.batch_size = batch_size;
197 }
198
199 if (buffer_size_s) {
200 buffer_size_s += 12;
201 const int buffer_size = atoi(buffer_size_s);
202 if (buffer_size < MINIMUM_BUFFER_SIZE) {
203 fprintf(stderr, "INTEL_MEASURE minimum buffer_size is 1k: "
204 "%d\n", DEFAULT_BUFFER_SIZE);
205 }
206 if (buffer_size > MINIMUM_BUFFER_SIZE * 1024) {
207 fprintf(stderr, "INTEL_MEASURE buffer_size limited to 1M: "
208 "%d\n", buffer_size);
209 }
210
211 config.buffer_size = buffer_size;
212 }
213
214 if (cpu_s) {
215 config.cpu_measure = true;
216 }
217 }
218
219 device->config = NULL;
220 device->frame = 0;
221 device->render_pass_count = 0;
222 device->release_batch = NULL;
223 pthread_mutex_init(&device->mutex, NULL);
224 list_inithead(&device->queued_snapshots);
225
226 if (env)
227 device->config = &config;
228 }
229
230 const char *
intel_measure_snapshot_string(enum intel_measure_snapshot_type type)231 intel_measure_snapshot_string(enum intel_measure_snapshot_type type)
232 {
233 const char *names[] = {
234 [INTEL_SNAPSHOT_UNDEFINED] = "undefined",
235 [INTEL_SNAPSHOT_BLIT] = "blit",
236 [INTEL_SNAPSHOT_CCS_AMBIGUATE] = "ccs ambiguate",
237 [INTEL_SNAPSHOT_CCS_COLOR_CLEAR] = "ccs color clear",
238 [INTEL_SNAPSHOT_CCS_PARTIAL_RESOLVE] = "ccs partial resolve",
239 [INTEL_SNAPSHOT_CCS_RESOLVE] = "ccs resolve",
240 [INTEL_SNAPSHOT_COMPUTE] = "compute",
241 [INTEL_SNAPSHOT_COPY] = "copy",
242 [INTEL_SNAPSHOT_DRAW] = "draw",
243 [INTEL_SNAPSHOT_HIZ_AMBIGUATE] = "hiz ambiguate",
244 [INTEL_SNAPSHOT_HIZ_CLEAR] = "hiz clear",
245 [INTEL_SNAPSHOT_HIZ_RESOLVE] = "hiz resolve",
246 [INTEL_SNAPSHOT_MCS_AMBIGUATE] = "mcs ambiguate",
247 [INTEL_SNAPSHOT_MCS_COLOR_CLEAR] = "mcs color clear",
248 [INTEL_SNAPSHOT_MCS_PARTIAL_RESOLVE] = "mcs partial resolve",
249 [INTEL_SNAPSHOT_SLOW_COLOR_CLEAR] = "slow color clear",
250 [INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR] = "slow depth clear",
251 [INTEL_SNAPSHOT_SECONDARY_BATCH] = "secondary command buffer",
252 [INTEL_SNAPSHOT_END] = "end",
253 };
254 assert(type < ARRAY_SIZE(names));
255 assert(names[type] != NULL);
256 assert(type != INTEL_SNAPSHOT_UNDEFINED);
257 return names[type];
258 }
259
260 /**
261 * Indicate to the caller whether a new snapshot should be started.
262 *
263 * Callers provide rendering state to this method to determine whether the
264 * current start event should be skipped. Depending on the configuration
265 * flags, a new snapshot may start:
266 * - at every event
267 * - when the program changes
268 * - after a batch is submitted
269 * - at frame boundaries
270 *
271 * Returns true if a snapshot should be started.
272 */
273 bool
intel_measure_state_changed(const struct intel_measure_batch * batch,uint32_t vs,uint32_t tcs,uint32_t tes,uint32_t gs,uint32_t fs,uint32_t cs,uint32_t ms,uint32_t ts)274 intel_measure_state_changed(const struct intel_measure_batch *batch,
275 uint32_t vs, uint32_t tcs, uint32_t tes,
276 uint32_t gs, uint32_t fs, uint32_t cs,
277 uint32_t ms, uint32_t ts)
278 {
279 if (batch->index == 0) {
280 /* always record the first event */
281 return true;
282 }
283
284 const struct intel_measure_snapshot *last_snap =
285 &batch->snapshots[batch->index - 1];
286
287 if (config.flags & INTEL_MEASURE_DRAW)
288 return true;
289
290 if (batch->index % 2 == 0) {
291 /* no snapshot is running, but we have a start event */
292 return true;
293 }
294
295 if (config.flags & (INTEL_MEASURE_FRAME | INTEL_MEASURE_BATCH)) {
296 /* only start collection when index == 0, at the beginning of a batch */
297 return false;
298 }
299
300 if (config.flags & INTEL_MEASURE_RENDERPASS) {
301 bool new_renderpass = !cs && last_snap->renderpass != batch->renderpass;
302 bool new_compute_block = cs && last_snap->type != INTEL_SNAPSHOT_COMPUTE;
303 return new_renderpass || new_compute_block;
304 }
305
306 /* remaining comparisons check the state of the render pipeline for
307 * INTEL_MEASURE_PROGRAM
308 */
309 assert(config.flags & INTEL_MEASURE_SHADER);
310
311 if (!vs && !tcs && !tes && !gs && !fs && !cs && !ms && !ts) {
312 /* blorp always changes program */
313 return true;
314 }
315
316 return (last_snap->vs != vs ||
317 last_snap->tcs != tcs ||
318 last_snap->tes != tes ||
319 last_snap->gs != gs ||
320 last_snap->fs != fs ||
321 last_snap->cs != cs ||
322 last_snap->ms != ms ||
323 last_snap->ts != ts);
324 }
325
326 /**
327 * Notify intel_measure that a frame is about to begin.
328 *
329 * Configuration values and the control fifo may commence measurement at frame
330 * boundaries.
331 */
332 void
intel_measure_frame_transition(unsigned frame)333 intel_measure_frame_transition(unsigned frame)
334 {
335 if (frame == config.start_frame)
336 config.enabled = true;
337 else if (frame == config.end_frame)
338 config.enabled = false;
339
340 /* user commands to the control fifo will override any start/count
341 * environment settings
342 */
343 if (config.control_fh != -1) {
344 while (true) {
345 const unsigned BUF_SIZE = 128;
346 char buf[BUF_SIZE];
347 ssize_t bytes = read(config.control_fh, buf, BUF_SIZE - 1);
348 if (bytes == 0)
349 break;
350 if (bytes == -1) {
351 fprintf(stderr, "INTEL_MEASURE failed to read control fifo: %s\n",
352 strerror(errno));
353 abort();
354 }
355
356 buf[bytes] = '\0';
357 char *nptr = buf, *endptr = buf;
358 while (*nptr != '\0' && *endptr != '\0') {
359 long fcount = strtol(nptr, &endptr, 10);
360 if (nptr == endptr) {
361 config.enabled = false;
362 fprintf(stderr, "INTEL_MEASURE invalid frame count on "
363 "control fifo.\n");
364 lseek(config.control_fh, 0, SEEK_END);
365 break;
366 } else if (fcount == 0) {
367 config.enabled = false;
368 } else {
369 config.enabled = true;
370 config.end_frame = frame + fcount;
371 }
372
373 nptr = endptr + 1;
374 }
375 }
376 }
377 }
378
379 #define TIMESTAMP_BITS 36
380 static uint64_t
raw_timestamp_delta(uint64_t time0,uint64_t time1)381 raw_timestamp_delta(uint64_t time0, uint64_t time1)
382 {
383 if (time0 > time1) {
384 return (1ULL << TIMESTAMP_BITS) + time1 - time0;
385 } else {
386 return time1 - time0;
387 }
388 }
389
390 /**
391 * Verify that rendering has completed for the batch
392 *
393 * Rendering is complete when the last timestamp has been written.
394 */
395 bool
intel_measure_ready(struct intel_measure_batch * batch)396 intel_measure_ready(struct intel_measure_batch *batch)
397 {
398 assert(batch->timestamps);
399 assert(batch->index > 1);
400 return (batch->timestamps[batch->index - 1] != 0);
401 }
402
403 /**
404 * Submit completed snapshots for buffering.
405 *
406 * Snapshot data becomes available when asynchronous rendering completes.
407 * Depending on configuration, snapshot data may need to be collated before
408 * writing to the output file.
409 */
410 static void
intel_measure_push_result(struct intel_measure_device * device,struct intel_measure_batch * batch)411 intel_measure_push_result(struct intel_measure_device *device,
412 struct intel_measure_batch *batch)
413 {
414 struct intel_measure_ringbuffer *rb = device->ringbuffer;
415
416 uint64_t *timestamps = batch->timestamps;
417 assert(timestamps != NULL);
418 assert(batch->index == 0 || timestamps[0] != 0);
419
420 for (int i = 0; i < batch->index; i += 2) {
421 const struct intel_measure_snapshot *begin = &batch->snapshots[i];
422 const struct intel_measure_snapshot *end = &batch->snapshots[i+1];
423
424 assert (end->type == INTEL_SNAPSHOT_END);
425
426 if (begin->type == INTEL_SNAPSHOT_SECONDARY_BATCH) {
427 assert(begin->secondary != NULL);
428 begin->secondary->batch_count = batch->batch_count;
429 begin->secondary->batch_size = 0;
430 begin->secondary->primary_renderpass = batch->renderpass;
431 intel_measure_push_result(device, begin->secondary);
432 continue;
433 }
434
435 const uint64_t prev_end_ts = rb->results[rb->head].end_ts;
436
437 /* advance ring buffer */
438 if (++rb->head == config.buffer_size)
439 rb->head = 0;
440 if (rb->head == rb->tail) {
441 static bool warned = false;
442 if (unlikely(!warned)) {
443 fprintf(config.file,
444 "WARNING: Buffered data exceeds INTEL_MEASURE limit: %d. "
445 "Data has been dropped. "
446 "Increase setting with INTEL_MEASURE=buffer_size={count}\n",
447 config.buffer_size);
448 warned = true;
449 }
450 break;
451 }
452
453 struct intel_measure_buffered_result *buffered_result =
454 &rb->results[rb->head];
455
456 memset(buffered_result, 0, sizeof(*buffered_result));
457 memcpy(&buffered_result->snapshot, begin,
458 sizeof(struct intel_measure_snapshot));
459 buffered_result->start_ts = timestamps[i];
460 buffered_result->end_ts = timestamps[i+1];
461 buffered_result->idle_duration =
462 raw_timestamp_delta(prev_end_ts, buffered_result->start_ts);
463 buffered_result->frame = batch->frame;
464 buffered_result->batch_count = batch->batch_count;
465 buffered_result->batch_size = batch->batch_size;
466 buffered_result->primary_renderpass = batch->primary_renderpass;
467 buffered_result->event_index = i / 2;
468 buffered_result->snapshot.event_count = end->event_count;
469 }
470 }
471
472 static unsigned
ringbuffer_size(const struct intel_measure_ringbuffer * rb)473 ringbuffer_size(const struct intel_measure_ringbuffer *rb)
474 {
475 unsigned head = rb->head;
476 if (head < rb->tail)
477 head += config.buffer_size;
478 return head - rb->tail;
479 }
480
481 static const struct intel_measure_buffered_result *
ringbuffer_pop(struct intel_measure_ringbuffer * rb)482 ringbuffer_pop(struct intel_measure_ringbuffer *rb)
483 {
484 if (rb->tail == rb->head) {
485 /* encountered ringbuffer overflow while processing events */
486 return NULL;
487 }
488
489 if (++rb->tail == config.buffer_size)
490 rb->tail = 0;
491 return &rb->results[rb->tail];
492 }
493
494 static const struct intel_measure_buffered_result *
ringbuffer_peek(const struct intel_measure_ringbuffer * rb,unsigned index)495 ringbuffer_peek(const struct intel_measure_ringbuffer *rb, unsigned index)
496 {
497 int result_offset = rb->tail + index + 1;
498 if (result_offset >= config.buffer_size)
499 result_offset -= config.buffer_size;
500 return &rb->results[result_offset];
501 }
502
503
504 /**
505 * Determine the number of buffered events that must be combined for the next
506 * line of csv output. Returns 0 if more events are needed.
507 */
508 static unsigned
buffered_event_count(struct intel_measure_device * device)509 buffered_event_count(struct intel_measure_device *device)
510 {
511 const struct intel_measure_ringbuffer *rb = device->ringbuffer;
512 const unsigned buffered_event_count = ringbuffer_size(rb);
513 if (buffered_event_count == 0) {
514 /* no events to collect */
515 return 0;
516 }
517
518 /* count the number of buffered events required to meet the configuration */
519 if (config.flags & (INTEL_MEASURE_DRAW |
520 INTEL_MEASURE_RENDERPASS |
521 INTEL_MEASURE_SHADER)) {
522 /* For these flags, every buffered event represents a line in the
523 * output. None of these events span batches. If the event interval
524 * crosses a batch boundary, then the next interval starts with the new
525 * batch.
526 */
527 return 1;
528 }
529
530 const unsigned start_frame = ringbuffer_peek(rb, 0)->frame;
531 if (config.flags & INTEL_MEASURE_BATCH) {
532 /* each buffered event is a command buffer. The number of events to
533 * process is the same as the interval, unless the interval crosses a
534 * frame boundary
535 */
536 if (buffered_event_count < config.event_interval) {
537 /* not enough events */
538 return 0;
539 }
540
541 /* Imperfect frame tracking requires us to allow for *older* frames */
542 if (ringbuffer_peek(rb, config.event_interval - 1)->frame <= start_frame) {
543 /* No frame transition. The next {interval} events should be combined. */
544 return config.event_interval;
545 }
546
547 /* Else a frame transition occurs within the interval. Find the
548 * transition, so the following line of output begins with the batch
549 * that starts the new frame.
550 */
551 for (int event_index = 1;
552 event_index <= config.event_interval;
553 ++event_index) {
554 if (ringbuffer_peek(rb, event_index)->frame > start_frame)
555 return event_index;
556 }
557
558 assert(false);
559 }
560
561 /* Else we need to search buffered events to find the matching frame
562 * transition for our interval.
563 */
564 assert(config.flags & INTEL_MEASURE_FRAME);
565 for (int event_index = 1;
566 event_index < buffered_event_count;
567 ++event_index) {
568 const int latest_frame = ringbuffer_peek(rb, event_index)->frame;
569 if (latest_frame - start_frame >= config.event_interval)
570 return event_index;
571 }
572
573 return 0;
574 }
575
576 /**
577 * Take result_count events from the ringbuffer and output them as a single
578 * line.
579 */
580 static void
print_combined_results(struct intel_measure_device * measure_device,int result_count,const struct intel_device_info * info)581 print_combined_results(struct intel_measure_device *measure_device,
582 int result_count,
583 const struct intel_device_info *info)
584 {
585 if (result_count == 0)
586 return;
587
588 struct intel_measure_ringbuffer *result_rb = measure_device->ringbuffer;
589 assert(ringbuffer_size(result_rb) >= result_count);
590 const struct intel_measure_buffered_result* start_result =
591 ringbuffer_pop(result_rb);
592 const struct intel_measure_buffered_result* current_result = start_result;
593
594 if (start_result == NULL)
595 return;
596 --result_count;
597
598 uint64_t duration_ts = raw_timestamp_delta(start_result->start_ts,
599 current_result->end_ts);
600 unsigned event_count = start_result->snapshot.event_count;
601 while (result_count-- > 0) {
602 assert(ringbuffer_size(result_rb) > 0);
603 current_result = ringbuffer_pop(result_rb);
604 if (current_result == NULL)
605 return;
606 duration_ts += raw_timestamp_delta(current_result->start_ts,
607 current_result->end_ts);
608 event_count += current_result->snapshot.event_count;
609 }
610
611 uint64_t duration_idle_ns =
612 intel_device_info_timebase_scale(info, start_result->idle_duration);
613 uint64_t duration_time_ns =
614 intel_device_info_timebase_scale(info, duration_ts);
615 const struct intel_measure_snapshot *begin = &start_result->snapshot;
616 uint32_t renderpass = (start_result->primary_renderpass)
617 ? start_result->primary_renderpass : begin->renderpass;
618 fprintf(config.file, "%"PRIu64",%"PRIu64",%u,%u,%"PRIu64",%u,%u,%u,%s,%u,"
619 "0x%x,0x%x,0x%x,0x%x,0x%x,0x%x,0x%x,0x%x,%.3lf,%.3lf\n",
620 start_result->start_ts, current_result->end_ts,
621 start_result->frame,
622 start_result->batch_count, start_result->batch_size,
623 renderpass, start_result->event_index, event_count,
624 begin->event_name, begin->count,
625 begin->vs, begin->tcs, begin->tes, begin->gs,
626 begin->fs, begin->cs, begin->ms, begin->ts,
627 (double)duration_idle_ns / 1000.0,
628 (double)duration_time_ns / 1000.0);
629 }
630
631 /**
632 * Write data for a cpu event.
633 */
634 void
intel_measure_print_cpu_result(unsigned int frame,unsigned int batch_count,uint64_t batch_size,unsigned int event_index,unsigned int event_count,unsigned int count,const char * event_name)635 intel_measure_print_cpu_result(unsigned int frame,
636 unsigned int batch_count,
637 uint64_t batch_size,
638 unsigned int event_index,
639 unsigned int event_count,
640 unsigned int count,
641 const char* event_name)
642 {
643 assert(config.cpu_measure);
644 uint64_t start_ns = os_time_get_nano();
645
646 fprintf(config.file, "%"PRIu64",%u,%3u,%"PRIu64",%3u,%u,%s,%u\n",
647 start_ns, frame, batch_count, batch_size,
648 event_index, event_count, event_name, count);
649 }
650
651 /**
652 * Empty the ringbuffer of events that can be printed.
653 */
654 static void
intel_measure_print(struct intel_measure_device * device,const struct intel_device_info * info)655 intel_measure_print(struct intel_measure_device *device,
656 const struct intel_device_info *info)
657 {
658 if (unlikely(config.deferred_create_filename)) {
659 config.file = fopen(config.deferred_create_filename, "w");
660 if (!config.file) {
661 fprintf(stderr, "INTEL_MEASURE failed to open output file %s: %s\n",
662 config.deferred_create_filename, strerror(errno));
663 abort();
664 }
665 free(config.deferred_create_filename);
666 config.deferred_create_filename = NULL;
667
668 if (!config.cpu_measure)
669 fputs("draw_start,draw_end,frame,batch,batch_size,renderpass,"
670 "event_index,event_count,type,count,vs,tcs,tes,"
671 "gs,fs,cs,ms,ts,idle_us,time_us\n",
672 config.file);
673 else
674 fputs("draw_start,frame,batch,batch_size,event_index,event_count,"
675 "type,count\n",
676 config.file);
677 }
678
679 while (true) {
680 const int events_to_combine = buffered_event_count(device);
681 if (events_to_combine == 0)
682 break;
683 print_combined_results(device, events_to_combine, info);
684 }
685 }
686
687 /**
688 * Collect snapshots from completed command buffers and submit them to
689 * intel_measure for printing.
690 */
691 void
intel_measure_gather(struct intel_measure_device * measure_device,const struct intel_device_info * info)692 intel_measure_gather(struct intel_measure_device *measure_device,
693 const struct intel_device_info *info)
694 {
695 pthread_mutex_lock(&measure_device->mutex);
696
697 /* Iterate snapshots and collect if ready. Each snapshot queue will be
698 * in-order, but we must determine which queue has the oldest batch.
699 */
700 /* iterate snapshots and collect if ready */
701 while (!list_is_empty(&measure_device->queued_snapshots)) {
702 struct intel_measure_batch *batch =
703 list_first_entry(&measure_device->queued_snapshots,
704 struct intel_measure_batch, link);
705
706 if (!intel_measure_ready(batch)) {
707 /* command buffer has begun execution on the gpu, but has not
708 * completed.
709 */
710 break;
711 }
712
713 list_del(&batch->link);
714 assert(batch->index % 2 == 0);
715
716 intel_measure_push_result(measure_device, batch);
717
718 batch->index = 0;
719 batch->frame = 0;
720 if (measure_device->release_batch)
721 measure_device->release_batch(batch);
722 }
723
724 intel_measure_print(measure_device, info);
725 pthread_mutex_unlock(&measure_device->mutex);
726 }
727