1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3 * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
4 *
5 */
6 #define _LARGEFILE64_SOURCE
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <fcntl.h>
10 #include <time.h>
11 #include <poll.h>
12 #include <unistd.h>
13 #include <errno.h>
14
15 #include "tracefs.h"
16 #include "trace-cmd-private.h"
17 #include "trace-cmd-local.h"
18 #include "event-utils.h"
19
20 /* F_GETPIPE_SZ was introduced in 2.6.35, older systems don't have it */
21 #ifndef F_GETPIPE_SZ
22 # define F_GETPIPE_SZ 1032 /* The Linux number for the option */
23 #endif
24
25 #ifndef SPLICE_F_MOVE
26 # define SPLICE_F_MOVE 1
27 # define SPLICE_F_NONBLOCK 2
28 # define SPLICE_F_MORE 4
29 # define SPLICE_F_GIFT 8
30 #endif
31
32 #define POLL_TIMEOUT_MS 1000
33
34 struct tracecmd_recorder {
35 int fd;
36 int fd1;
37 int fd2;
38 int trace_fd;
39 int brass[2];
40 int pipe_size;
41 int page_size;
42 int cpu;
43 int stop;
44 int max;
45 int pages;
46 int count;
47 unsigned fd_flags;
48 unsigned trace_fd_flags;
49 unsigned flags;
50 };
51
append_file(int size,int dst,int src)52 static int append_file(int size, int dst, int src)
53 {
54 char buf[size];
55 int r;
56
57 lseek64(src, 0, SEEK_SET);
58
59 /* If there's an error, then we are pretty much screwed :-p */
60 do {
61 r = read(src, buf, size);
62 if (r < 0)
63 return r;
64 r = write(dst, buf, r);
65 if (r < 0)
66 return r;
67 } while (r);
68 return 0;
69 }
70
tracecmd_free_recorder(struct tracecmd_recorder * recorder)71 void tracecmd_free_recorder(struct tracecmd_recorder *recorder)
72 {
73 if (!recorder)
74 return;
75
76 if (recorder->max) {
77 /* Need to put everything into fd1 */
78 if (recorder->fd == recorder->fd1) {
79 int ret;
80 /*
81 * Crap, the older data is in fd2, and we need
82 * to append fd1 onto it, and then copy over to fd1
83 */
84 ret = append_file(recorder->page_size,
85 recorder->fd2, recorder->fd1);
86 /* Error on copying, then just keep fd1 */
87 if (ret) {
88 lseek64(recorder->fd1, 0, SEEK_END);
89 goto close;
90 }
91 lseek64(recorder->fd1, 0, SEEK_SET);
92 ftruncate(recorder->fd1, 0);
93 }
94 append_file(recorder->page_size, recorder->fd1, recorder->fd2);
95 }
96 close:
97 if (recorder->brass[0] >= 0)
98 close(recorder->brass[0]);
99
100 if (recorder->brass[1] >= 0)
101 close(recorder->brass[1]);
102
103 if (recorder->trace_fd >= 0)
104 close(recorder->trace_fd);
105
106 if (recorder->fd1 >= 0)
107 close(recorder->fd1);
108
109 if (recorder->fd2 >= 0)
110 close(recorder->fd2);
111
112 free(recorder);
113 }
114
set_nonblock(struct tracecmd_recorder * recorder)115 static void set_nonblock(struct tracecmd_recorder *recorder)
116 {
117 long flags;
118
119 /* Do not block on reads */
120 flags = fcntl(recorder->trace_fd, F_GETFL);
121 fcntl(recorder->trace_fd, F_SETFL, flags | O_NONBLOCK);
122
123 /* Do not block on streams */
124 recorder->fd_flags |= SPLICE_F_NONBLOCK;
125 }
126
127 struct tracecmd_recorder *
tracecmd_create_buffer_recorder_fd2(int fd,int fd2,int cpu,unsigned flags,const char * buffer,int maxkb)128 tracecmd_create_buffer_recorder_fd2(int fd, int fd2, int cpu, unsigned flags,
129 const char *buffer, int maxkb)
130 {
131 struct tracecmd_recorder *recorder;
132 char *path = NULL;
133 int pipe_size = 0;
134 int ret;
135
136 recorder = malloc(sizeof(*recorder));
137 if (!recorder)
138 return NULL;
139
140 recorder->cpu = cpu;
141 recorder->flags = flags;
142
143 recorder->fd_flags = SPLICE_F_MOVE;
144
145 if (!(recorder->flags & TRACECMD_RECORD_BLOCK_SPLICE))
146 recorder->fd_flags |= SPLICE_F_NONBLOCK;
147
148 recorder->trace_fd_flags = SPLICE_F_MOVE;
149
150 /* Init to know what to free and release */
151 recorder->trace_fd = -1;
152 recorder->brass[0] = -1;
153 recorder->brass[1] = -1;
154
155 recorder->page_size = getpagesize();
156 if (maxkb) {
157 int kb_per_page = recorder->page_size >> 10;
158
159 if (!kb_per_page)
160 kb_per_page = 1;
161 recorder->max = maxkb / kb_per_page;
162 /* keep max half */
163 recorder->max >>= 1;
164 if (!recorder->max)
165 recorder->max = 1;
166 } else
167 recorder->max = 0;
168
169 recorder->count = 0;
170 recorder->pages = 0;
171
172 /* fd always points to what to write to */
173 recorder->fd = fd;
174 recorder->fd1 = fd;
175 recorder->fd2 = fd2;
176
177 if (buffer) {
178 if (flags & TRACECMD_RECORD_SNAPSHOT)
179 ret = asprintf(&path, "%s/per_cpu/cpu%d/snapshot_raw",
180 buffer, cpu);
181 else
182 ret = asprintf(&path, "%s/per_cpu/cpu%d/trace_pipe_raw",
183 buffer, cpu);
184 if (ret < 0)
185 goto out_free;
186
187 recorder->trace_fd = open(path, O_RDONLY);
188 free(path);
189
190 if (recorder->trace_fd < 0)
191 goto out_free;
192 }
193
194 if (!(recorder->flags & (TRACECMD_RECORD_NOSPLICE |
195 TRACECMD_RECORD_NOBRASS))) {
196 ret = pipe(recorder->brass);
197 if (ret < 0)
198 goto out_free;
199
200 ret = fcntl(recorder->brass[0], F_GETPIPE_SZ, &pipe_size);
201 /*
202 * F_GETPIPE_SZ was introduced in 2.6.35, ftrace was introduced
203 * in 2.6.31. If we are running on an older kernel, just fall
204 * back to using page_size for splice(). It could also return
205 * success, but not modify pipe_size.
206 */
207 if (ret < 0 || !pipe_size)
208 pipe_size = recorder->page_size;
209
210 recorder->pipe_size = pipe_size;
211 }
212
213 if (recorder->flags & TRACECMD_RECORD_POLL)
214 set_nonblock(recorder);
215
216 return recorder;
217
218 out_free:
219 tracecmd_free_recorder(recorder);
220 return NULL;
221 }
222
223 struct tracecmd_recorder *
tracecmd_create_buffer_recorder_fd(int fd,int cpu,unsigned flags,const char * buffer)224 tracecmd_create_buffer_recorder_fd(int fd, int cpu, unsigned flags, const char *buffer)
225 {
226 return tracecmd_create_buffer_recorder_fd2(fd, -1, cpu, flags, buffer, 0);
227 }
228
229 static struct tracecmd_recorder *
__tracecmd_create_buffer_recorder(const char * file,int cpu,unsigned flags,const char * buffer)230 __tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags,
231 const char *buffer)
232 {
233 struct tracecmd_recorder *recorder;
234 int fd;
235
236 fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644);
237 if (fd < 0)
238 return NULL;
239
240 recorder = tracecmd_create_buffer_recorder_fd(fd, cpu, flags, buffer);
241 if (!recorder) {
242 close(fd);
243 unlink(file);
244 }
245
246 return recorder;
247 }
248
249 struct tracecmd_recorder *
tracecmd_create_buffer_recorder_maxkb(const char * file,int cpu,unsigned flags,const char * buffer,int maxkb)250 tracecmd_create_buffer_recorder_maxkb(const char *file, int cpu, unsigned flags,
251 const char *buffer, int maxkb)
252 {
253 struct tracecmd_recorder *recorder = NULL;
254 char *file2;
255 int len;
256 int fd;
257 int fd2;
258
259 if (!maxkb)
260 return tracecmd_create_buffer_recorder(file, cpu, flags, buffer);
261
262 len = strlen(file);
263 file2 = malloc(len + 3);
264 if (!file2)
265 return NULL;
266
267 sprintf(file2, "%s.1", file);
268
269 fd = open(file, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0644);
270 if (fd < 0)
271 goto out;
272
273 fd2 = open(file2, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0644);
274 if (fd2 < 0)
275 goto err;
276
277 recorder = tracecmd_create_buffer_recorder_fd2(fd, fd2, cpu, flags, buffer, maxkb);
278 if (!recorder)
279 goto err2;
280 out:
281 /* Unlink file2, we need to add everything to file at the end */
282 unlink(file2);
283 free(file2);
284
285 return recorder;
286 err2:
287 close(fd2);
288 err:
289 close(fd);
290 unlink(file);
291 goto out;
292 }
293
294 struct tracecmd_recorder *
tracecmd_create_buffer_recorder(const char * file,int cpu,unsigned flags,const char * buffer)295 tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags,
296 const char *buffer)
297 {
298 return __tracecmd_create_buffer_recorder(file, cpu, flags, buffer);
299 }
300
301 /**
302 * tracecmd_create_recorder_virt - Create a recorder reading tracing data
303 * from the trace_fd file descriptor instead of from the local tracefs
304 * @file: output filename where tracing data will be written
305 * @cpu: which CPU is being traced
306 * @flags: flags configuring the recorder (see TRACECMD_RECORDER_* enums)
307 * @trace_fd: file descriptor from where tracing data will be read
308 */
309 struct tracecmd_recorder *
tracecmd_create_recorder_virt(const char * file,int cpu,unsigned flags,int trace_fd)310 tracecmd_create_recorder_virt(const char *file, int cpu, unsigned flags,
311 int trace_fd)
312 {
313 struct tracecmd_recorder *recorder;
314
315 recorder = __tracecmd_create_buffer_recorder(file, cpu, flags, NULL);
316 if (recorder)
317 recorder->trace_fd = trace_fd;
318
319 return recorder;
320 }
321
tracecmd_create_recorder_fd(int fd,int cpu,unsigned flags)322 struct tracecmd_recorder *tracecmd_create_recorder_fd(int fd, int cpu, unsigned flags)
323 {
324 const char *tracing;
325
326 tracing = tracefs_tracing_dir();
327 if (!tracing) {
328 errno = ENODEV;
329 return NULL;
330 }
331
332 return tracecmd_create_buffer_recorder_fd(fd, cpu, flags, tracing);
333 }
334
tracecmd_create_recorder(const char * file,int cpu,unsigned flags)335 struct tracecmd_recorder *tracecmd_create_recorder(const char *file, int cpu, unsigned flags)
336 {
337 const char *tracing;
338
339 tracing = tracefs_tracing_dir();
340 if (!tracing) {
341 errno = ENODEV;
342 return NULL;
343 }
344
345 return tracecmd_create_buffer_recorder(file, cpu, flags, tracing);
346 }
347
348 struct tracecmd_recorder *
tracecmd_create_recorder_maxkb(const char * file,int cpu,unsigned flags,int maxkb)349 tracecmd_create_recorder_maxkb(const char *file, int cpu, unsigned flags, int maxkb)
350 {
351 const char *tracing;
352
353 tracing = tracefs_tracing_dir();
354 if (!tracing) {
355 errno = ENODEV;
356 return NULL;
357 }
358
359 return tracecmd_create_buffer_recorder_maxkb(file, cpu, flags, tracing, maxkb);
360 }
361
update_fd(struct tracecmd_recorder * recorder,int size)362 static inline void update_fd(struct tracecmd_recorder *recorder, int size)
363 {
364 int fd;
365
366 if (!recorder->max)
367 return;
368
369 recorder->count += size;
370
371 if (recorder->count >= recorder->page_size) {
372 recorder->count = 0;
373 recorder->pages++;
374 }
375
376 if (recorder->pages < recorder->max)
377 return;
378
379 recorder->pages = 0;
380
381 fd = recorder->fd;
382
383 /* Swap fd to next file. */
384 if (fd == recorder->fd1)
385 fd = recorder->fd2;
386 else
387 fd = recorder->fd1;
388
389 /* Zero out the new file we are writing to */
390 lseek64(fd, 0, SEEK_SET);
391 ftruncate(fd, 0);
392
393 recorder->fd = fd;
394 }
395
396 /*
397 * Returns -1 on error.
398 * or bytes of data read.
399 */
splice_data(struct tracecmd_recorder * recorder)400 static long splice_data(struct tracecmd_recorder *recorder)
401 {
402 long total_read = 0;
403 long read;
404 long ret;
405
406 read = splice(recorder->trace_fd, NULL, recorder->brass[1], NULL,
407 recorder->pipe_size, recorder->trace_fd_flags);
408 if (read < 0) {
409 if (errno == EAGAIN || errno == EINTR || errno == ENOTCONN)
410 return 0;
411
412 tracecmd_warning("recorder error in splice input");
413 return -1;
414 } else if (read == 0)
415 return 0;
416
417 again:
418 ret = splice(recorder->brass[0], NULL, recorder->fd, NULL,
419 read, recorder->fd_flags);
420 if (ret < 0) {
421 if (errno != EAGAIN && errno != EINTR) {
422 tracecmd_warning("recorder error in splice output");
423 return -1;
424 }
425 return total_read;
426 } else
427 update_fd(recorder, ret);
428 total_read = ret;
429 read -= ret;
430 if (read)
431 goto again;
432
433 return total_read;
434 }
435
436 /*
437 * Returns -1 on error.
438 * or bytes of data read.
439 */
direct_splice_data(struct tracecmd_recorder * recorder)440 static long direct_splice_data(struct tracecmd_recorder *recorder)
441 {
442 struct pollfd pfd = {
443 .fd = recorder->trace_fd,
444 .events = POLLIN,
445 };
446 long read;
447 int ret;
448
449 /*
450 * splice(2) in Linux used to not check O_NONBLOCK flag of pipe file
451 * descriptors before [1]. To avoid getting blocked in the splice(2)
452 * call below after the user had requested to stop tracing, we poll(2)
453 * here. This poll() is not necessary on newer kernels.
454 *
455 * [1] https://github.com/torvalds/linux/commit/ee5e001196d1345b8fee25925ff5f1d67936081e
456 */
457 ret = poll(&pfd, 1, POLL_TIMEOUT_MS);
458 if (ret < 0)
459 return -1;
460
461 if (!(pfd.revents | POLLIN))
462 return 0;
463
464 read = splice(recorder->trace_fd, NULL, recorder->fd, NULL,
465 recorder->pipe_size, recorder->fd_flags);
466 if (read < 0) {
467 if (errno == EAGAIN || errno == EINTR || errno == ENOTCONN)
468 return 0;
469
470 tracecmd_warning("recorder error in splice input");
471 return -1;
472 }
473
474 return read;
475 }
476
477 /*
478 * Returns -1 on error.
479 * or bytes of data read.
480 */
read_data(struct tracecmd_recorder * recorder)481 static long read_data(struct tracecmd_recorder *recorder)
482 {
483 char buf[recorder->page_size];
484 long left;
485 long r, w;
486
487 r = read(recorder->trace_fd, buf, recorder->page_size);
488 if (r < 0) {
489 if (errno == EAGAIN || errno == EINTR || errno == ENOTCONN)
490 return 0;
491
492 tracecmd_warning("recorder error in read input");
493 return -1;
494 }
495
496 left = r;
497 do {
498 w = write(recorder->fd, buf + (r - left), left);
499 if (w > 0) {
500 left -= w;
501 update_fd(recorder, w);
502 }
503 } while (w >= 0 && left);
504
505 if (w < 0)
506 r = w;
507
508 return r;
509 }
510
move_data(struct tracecmd_recorder * recorder)511 static long move_data(struct tracecmd_recorder *recorder)
512 {
513 if (recorder->flags & TRACECMD_RECORD_NOSPLICE)
514 return read_data(recorder);
515
516 if (recorder->flags & TRACECMD_RECORD_NOBRASS)
517 return direct_splice_data(recorder);
518
519 return splice_data(recorder);
520 }
521
tracecmd_flush_recording(struct tracecmd_recorder * recorder)522 long tracecmd_flush_recording(struct tracecmd_recorder *recorder)
523 {
524 char buf[recorder->page_size];
525 long total = 0;
526 long wrote = 0;
527 long ret;
528
529 set_nonblock(recorder);
530
531 do {
532 ret = move_data(recorder);
533 if (ret < 0)
534 return ret;
535 total += ret;
536 } while (ret);
537
538 /* splice only reads full pages */
539 do {
540 ret = read(recorder->trace_fd, buf, recorder->page_size);
541 if (ret > 0) {
542 write(recorder->fd, buf, ret);
543 wrote += ret;
544 }
545
546 } while (ret > 0);
547
548 /* Make sure we finish off with a page size boundary */
549 wrote &= recorder->page_size - 1;
550 if (wrote) {
551 memset(buf, 0, recorder->page_size);
552 write(recorder->fd, buf, recorder->page_size - wrote);
553 total += recorder->page_size;
554 }
555
556 return total;
557 }
558
tracecmd_start_recording(struct tracecmd_recorder * recorder,unsigned long sleep)559 int tracecmd_start_recording(struct tracecmd_recorder *recorder, unsigned long sleep)
560 {
561 struct timespec req = {
562 .tv_sec = sleep / 1000000,
563 .tv_nsec = (sleep % 1000000) * 1000,
564 };
565 long read = 1;
566 long ret;
567
568 recorder->stop = 0;
569
570 do {
571 /* Only sleep if we did not read anything last time */
572 if (!read && sleep)
573 nanosleep(&req, NULL);
574
575 read = 0;
576 do {
577 ret = move_data(recorder);
578 if (ret < 0)
579 return ret;
580 read += ret;
581 } while (ret);
582 } while (!recorder->stop);
583
584 /* Flush out the rest */
585 ret = tracecmd_flush_recording(recorder);
586
587 if (ret < 0)
588 return ret;
589
590 return 0;
591 }
592
tracecmd_stop_recording(struct tracecmd_recorder * recorder)593 void tracecmd_stop_recording(struct tracecmd_recorder *recorder)
594 {
595 if (!recorder)
596 return;
597
598 set_nonblock(recorder);
599
600 recorder->stop = 1;
601 }
602