1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3 * Copyright (C) 2022 Google Inc, Steven Rostedt <rostedt@goodmis.org>
4 */
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <dirent.h>
8 #include <unistd.h>
9 #include <fcntl.h>
10 #include <limits.h>
11 #include <errno.h>
12 #include <sys/stat.h>
13 #include <sys/ioctl.h>
14 #include <sys/select.h>
15
16 #include <kbuffer.h>
17
18 #include "tracefs.h"
19 #include "tracefs-local.h"
20
21 enum {
22 TC_STOP = 1 << 0, /* Stop reading */
23 TC_PERM_NONBLOCK = 1 << 1, /* read is always non blocking */
24 TC_NONBLOCK = 1 << 2, /* read is non blocking */
25 };
26
27 struct tracefs_cpu {
28 int fd;
29 int flags;
30 int nfds;
31 int ctrl_pipe[2];
32 int splice_pipe[2];
33 int pipe_size;
34 int subbuf_size;
35 int buffered;
36 int splice_read_flags;
37 };
38
39 /**
40 * tracefs_cpu_alloc_fd - create a tracefs_cpu instance for an existing fd
41 * @fd: The file descriptor to attach the tracefs_cpu to
42 * @subbuf_size: The expected size to read the subbuffer with
43 * @nonblock: If true, the file will be opened in O_NONBLOCK mode
44 *
45 * Return a descriptor that can read the tracefs trace_pipe_raw file
46 * that is associated with the given @fd and must be read in @subbuf_size.
47 *
48 * Returns NULL on error.
49 */
50 struct tracefs_cpu *
tracefs_cpu_alloc_fd(int fd,int subbuf_size,bool nonblock)51 tracefs_cpu_alloc_fd(int fd, int subbuf_size, bool nonblock)
52 {
53 struct tracefs_cpu *tcpu;
54 int mode = O_RDONLY;
55 int ret;
56
57 tcpu = calloc(1, sizeof(*tcpu));
58 if (!tcpu)
59 return NULL;
60
61 if (nonblock) {
62 mode |= O_NONBLOCK;
63 tcpu->flags |= TC_NONBLOCK | TC_PERM_NONBLOCK;
64 }
65
66 tcpu->splice_pipe[0] = -1;
67 tcpu->splice_pipe[1] = -1;
68
69 tcpu->fd = fd;
70
71 tcpu->subbuf_size = subbuf_size;
72
73 if (tcpu->flags & TC_PERM_NONBLOCK) {
74 tcpu->ctrl_pipe[0] = -1;
75 tcpu->ctrl_pipe[1] = -1;
76 } else {
77 /* ctrl_pipe is used to break out of blocked reads */
78 ret = pipe(tcpu->ctrl_pipe);
79 if (ret < 0)
80 goto fail;
81 if (tcpu->ctrl_pipe[0] > tcpu->fd)
82 tcpu->nfds = tcpu->ctrl_pipe[0] + 1;
83 else
84 tcpu->nfds = tcpu->fd + 1;
85 }
86
87 return tcpu;
88 fail:
89 free(tcpu);
90 return NULL;
91 }
92
93 /**
94 * tracefs_cpu_open - open an instance raw trace file
95 * @instance: the instance (NULL for toplevel) of the cpu raw file to open
96 * @cpu: The CPU that the raw trace file is associated with
97 * @nonblock: If true, the file will be opened in O_NONBLOCK mode
98 *
99 * Return a descriptor that can read the tracefs trace_pipe_raw file
100 * for a give @cpu in a given @instance.
101 *
102 * Returns NULL on error.
103 */
104 struct tracefs_cpu *
tracefs_cpu_open(struct tracefs_instance * instance,int cpu,bool nonblock)105 tracefs_cpu_open(struct tracefs_instance *instance, int cpu, bool nonblock)
106 {
107 struct tracefs_cpu *tcpu;
108 struct tep_handle *tep;
109 char path[128];
110 char *buf;
111 int mode = O_RDONLY;
112 int subbuf_size;
113 int len;
114 int ret;
115 int fd;
116
117 if (nonblock)
118 mode |= O_NONBLOCK;
119
120 sprintf(path, "per_cpu/cpu%d/trace_pipe_raw", cpu);
121
122 fd = tracefs_instance_file_open(instance, path, mode);
123 if (fd < 0)
124 return NULL;
125
126 tep = tep_alloc();
127 if (!tep)
128 goto fail;
129
130 /* Get the size of the page */
131 buf = tracefs_instance_file_read(NULL, "events/header_page", &len);
132 if (!buf)
133 goto fail;
134
135 ret = tep_parse_header_page(tep, buf, len, sizeof(long));
136 free(buf);
137 if (ret < 0)
138 goto fail;
139
140 subbuf_size = tep_get_sub_buffer_size(tep);
141 tep_free(tep);
142 tep = NULL;
143
144 tcpu = tracefs_cpu_alloc_fd(fd, subbuf_size, nonblock);
145 if (!tcpu)
146 goto fail;
147
148 return tcpu;
149 fail:
150 tep_free(tep);
151 close(fd);
152 return NULL;
153 }
154
close_fd(int fd)155 static void close_fd(int fd)
156 {
157 if (fd < 0)
158 return;
159 close(fd);
160 }
161
162 /**
163 * tracefs_cpu_free_fd - clean up the tracefs_cpu descriptor
164 * @tcpu: The descriptor created with tracefs_cpu_alloc_fd()
165 *
166 * Closes all the internal file descriptors that were opened by
167 * tracefs_cpu_alloc_fd(), and frees the descriptor.
168 */
tracefs_cpu_free_fd(struct tracefs_cpu * tcpu)169 void tracefs_cpu_free_fd(struct tracefs_cpu *tcpu)
170 {
171 close_fd(tcpu->ctrl_pipe[0]);
172 close_fd(tcpu->ctrl_pipe[1]);
173 close_fd(tcpu->splice_pipe[0]);
174 close_fd(tcpu->splice_pipe[1]);
175
176 free(tcpu);
177 }
178
179 /**
180 * tracefs_cpu_close - clean up and close a raw trace descriptor
181 * @tcpu: The descriptor created with tracefs_cpu_open()
182 *
183 * Closes all the file descriptors associated to the trace_pipe_raw
184 * opened by tracefs_cpu_open().
185 */
tracefs_cpu_close(struct tracefs_cpu * tcpu)186 void tracefs_cpu_close(struct tracefs_cpu *tcpu)
187 {
188 if (!tcpu)
189 return;
190
191 close(tcpu->fd);
192 tracefs_cpu_free_fd(tcpu);
193 }
194
195 /**
196 * tracefs_cpu_read_size - Return the size of the sub buffer
197 * @tcpu: The descriptor that holds the size of the sub buffer
198 *
199 * A lot of the functions that read the data from the trace_pipe_raw
200 * expect the caller to have allocated enough space to store a full
201 * subbuffer. Calling this function is a requirement to do so.
202 */
tracefs_cpu_read_size(struct tracefs_cpu * tcpu)203 int tracefs_cpu_read_size(struct tracefs_cpu *tcpu)
204 {
205 if (!tcpu)
206 return -1;
207 return tcpu->subbuf_size;
208 }
209
set_nonblock(struct tracefs_cpu * tcpu)210 static void set_nonblock(struct tracefs_cpu *tcpu)
211 {
212 long flags;
213
214 if (tcpu->flags & TC_NONBLOCK)
215 return;
216
217 flags = fcntl(tcpu->fd, F_GETFL);
218 fcntl(tcpu->fd, F_SETFL, flags | O_NONBLOCK);
219 tcpu->flags |= TC_NONBLOCK;
220 }
221
unset_nonblock(struct tracefs_cpu * tcpu)222 static void unset_nonblock(struct tracefs_cpu *tcpu)
223 {
224 long flags;
225
226 if (!(tcpu->flags & TC_NONBLOCK))
227 return;
228
229 flags = fcntl(tcpu->fd, F_GETFL);
230 flags &= ~O_NONBLOCK;
231 fcntl(tcpu->fd, F_SETFL, flags);
232 tcpu->flags &= ~TC_NONBLOCK;
233 }
234
235 /*
236 * If set to blocking mode, block until the watermark has been
237 * reached, or the control has said to stop. If the contol is
238 * set, then nonblock will be set to true on the way out.
239 */
wait_on_input(struct tracefs_cpu * tcpu,bool nonblock)240 static int wait_on_input(struct tracefs_cpu *tcpu, bool nonblock)
241 {
242 fd_set rfds;
243 int ret;
244
245 if (tcpu->flags & TC_PERM_NONBLOCK)
246 return 1;
247
248 if (nonblock) {
249 set_nonblock(tcpu);
250 return 1;
251 } else {
252 unset_nonblock(tcpu);
253 }
254
255 FD_ZERO(&rfds);
256 FD_SET(tcpu->fd, &rfds);
257 FD_SET(tcpu->ctrl_pipe[0], &rfds);
258
259 ret = select(tcpu->nfds, &rfds, NULL, NULL, NULL);
260
261 /* Let the application decide what to do with signals and such */
262 if (ret < 0)
263 return ret;
264
265 if (FD_ISSET(tcpu->ctrl_pipe[0], &rfds)) {
266 /* Flush the ctrl pipe */
267 read(tcpu->ctrl_pipe[0], &ret, 1);
268
269 /* Make nonblock as it is now stopped */
270 set_nonblock(tcpu);
271 /* Permanently set unblock */
272 tcpu->flags |= TC_PERM_NONBLOCK;
273 }
274
275 return FD_ISSET(tcpu->fd, &rfds);
276 }
277
278 /**
279 * tracefs_cpu_read - read from the raw trace file
280 * @tcpu: The descriptor representing the raw trace file
281 * @buffer: Where to read into (must be at least the size of the subbuffer)
282 * @nonblock: Hint to not block on the read if there's no data.
283 *
284 * Reads the trace_pipe_raw files associated to @tcpu into @buffer.
285 * @buffer must be at least the size of the sub buffer of the ring buffer,
286 * which is returned by tracefs_cpu_read_size().
287 *
288 * If @nonblock is set, and there's no data available, it will return
289 * immediately. Otherwise depending on how @tcpu was opened, it will
290 * block. If @tcpu was opened with nonblock set, then this @nonblock
291 * will make no difference.
292 *
293 * Returns the amount read or -1 on error.
294 */
tracefs_cpu_read(struct tracefs_cpu * tcpu,void * buffer,bool nonblock)295 int tracefs_cpu_read(struct tracefs_cpu *tcpu, void *buffer, bool nonblock)
296 {
297 int ret;
298
299 /*
300 * If nonblock is set, then the wait_on_input() will return
301 * immediately, if there's nothing in the buffer, with
302 * ret == 0.
303 */
304 ret = wait_on_input(tcpu, nonblock);
305 if (ret <= 0)
306 return ret;
307
308 ret = read(tcpu->fd, buffer, tcpu->subbuf_size);
309
310 /* It's OK if there's no data to read */
311 if (ret < 0 && errno == EAGAIN) {
312 /* Reset errno */
313 errno = 0;
314 ret = 0;
315 }
316
317 return ret;
318 }
319
init_splice(struct tracefs_cpu * tcpu)320 static int init_splice(struct tracefs_cpu *tcpu)
321 {
322 int ret;
323
324 if (tcpu->splice_pipe[0] >= 0)
325 return 0;
326
327 ret = pipe(tcpu->splice_pipe);
328 if (ret < 0)
329 return ret;
330
331 ret = fcntl(tcpu->splice_pipe[0], F_GETPIPE_SZ, &tcpu->pipe_size);
332 /*
333 * F_GETPIPE_SZ was introduced in 2.6.35, ftrace was introduced
334 * in 2.6.31. If we are running on an older kernel, just fall
335 * back to using subbuf_size for splice(). It could also return
336 * the size of the pipe and not set pipe_size.
337 */
338 if (ret > 0 && !tcpu->pipe_size)
339 tcpu->pipe_size = ret;
340 else if (ret < 0)
341 tcpu->pipe_size = tcpu->subbuf_size;
342
343 tcpu->splice_read_flags = SPLICE_F_MOVE;
344 if (tcpu->flags & TC_NONBLOCK)
345 tcpu->splice_read_flags |= SPLICE_F_NONBLOCK;
346
347 return 0;
348 }
349
350 /**
351 * tracefs_cpu_buffered_read - Read the raw trace data buffering through a pipe
352 * @tcpu: The descriptor representing the raw trace file
353 * @buffer: Where to read into (must be at least the size of the subbuffer)
354 * @nonblock: Hint to not block on the read if there's no data.
355 *
356 * This is basically the same as tracefs_cpu_read() except that it uses
357 * a pipe through splice to buffer reads. This will batch reads keeping
358 * the reading from the ring buffer less intrusive to the system, as
359 * just reading all the time can cause quite a disturbance.
360 *
361 * Note, one difference between this and tracefs_cpu_read() is that it
362 * will read only in sub buffer pages. If the ring buffer has not filled
363 * a page, then it will not return anything, even with @nonblock set.
364 * Calls to tracefs_cpu_flush() should be done to read the rest of
365 * the file at the end of the trace.
366 *
367 * Returns the amount read or -1 on error.
368 */
tracefs_cpu_buffered_read(struct tracefs_cpu * tcpu,void * buffer,bool nonblock)369 int tracefs_cpu_buffered_read(struct tracefs_cpu *tcpu, void *buffer, bool nonblock)
370 {
371 int mode = SPLICE_F_MOVE;
372 int ret;
373
374 if (tcpu->buffered < 0)
375 tcpu->buffered = 0;
376
377 if (tcpu->buffered)
378 goto do_read;
379
380 ret = wait_on_input(tcpu, nonblock);
381 if (ret <= 0)
382 return ret;
383
384 if (tcpu->flags & TC_NONBLOCK)
385 mode |= SPLICE_F_NONBLOCK;
386
387 ret = init_splice(tcpu);
388 if (ret < 0)
389 return ret;
390
391 ret = splice(tcpu->fd, NULL, tcpu->splice_pipe[1], NULL,
392 tcpu->pipe_size, mode);
393 if (ret <= 0)
394 return ret;
395
396 tcpu->buffered = ret;
397
398 do_read:
399 ret = read(tcpu->splice_pipe[0], buffer, tcpu->subbuf_size);
400 if (ret > 0)
401 tcpu->buffered -= ret;
402 return ret;
403 }
404
405 /**
406 * tracefs_cpu_stop - Stop a blocked read of the raw tracing file
407 * @tcpu: The descriptor representing the raw trace file
408 *
409 * This will attempt to unblock a task blocked on @tcpu reading it.
410 * On older kernels, it may not do anything for the pipe reads, as
411 * older kernels do not wake up tasks waiting on the ring buffer.
412 *
413 * Returns 0 if the tasks reading the raw tracing file does not
414 * need a nudge.
415 *
416 * Returns 1 if that tasks may need a nudge (send a signal).
417 *
418 * Returns negative on error.
419 */
tracefs_cpu_stop(struct tracefs_cpu * tcpu)420 int tracefs_cpu_stop(struct tracefs_cpu *tcpu)
421 {
422 int ret = 1;
423
424 if (tcpu->flags & TC_PERM_NONBLOCK)
425 return 0;
426
427 ret = write(tcpu->ctrl_pipe[1], &ret, 1);
428 if (ret < 0)
429 return ret;
430
431 /* Calling ioctl() on recent kernels will wake up the waiters */
432 ret = ioctl(tcpu->fd, 0);
433 if (ret < 0)
434 ret = 1;
435 else
436 ret = 0;
437
438 set_nonblock(tcpu);
439
440 return ret;
441 }
442
443 /**
444 * tracefs_cpu_flush - Finish out and read the rest of the raw tracing file
445 * @tcpu: The descriptor representing the raw trace file
446 * @buffer: Where to read into (must be at least the size of the subbuffer)
447 *
448 * Reads the trace_pipe_raw file associated by the @tcpu and puts it
449 * into @buffer, which must be the size of the sub buffer which is retrieved.
450 * by tracefs_cpu_read_size(). This should be called at the end of tracing
451 * to get the rest of the data.
452 *
453 * This will set the file descriptor for reading to non-blocking mode.
454 *
455 * Returns the number of bytes read, or negative on error.
456 */
tracefs_cpu_flush(struct tracefs_cpu * tcpu,void * buffer)457 int tracefs_cpu_flush(struct tracefs_cpu *tcpu, void *buffer)
458 {
459 int ret;
460
461 /* Make sure that reading is now non blocking */
462 set_nonblock(tcpu);
463
464 if (tcpu->buffered < 0)
465 tcpu->buffered = 0;
466
467 if (tcpu->buffered) {
468 ret = read(tcpu->splice_pipe[0], buffer, tcpu->subbuf_size);
469 if (ret > 0)
470 tcpu->buffered -= ret;
471 return ret;
472 }
473
474 ret = read(tcpu->fd, buffer, tcpu->subbuf_size);
475 if (ret > 0 && tcpu->buffered)
476 tcpu->buffered -= ret;
477
478 /* It's OK if there's no data to read */
479 if (ret < 0 && errno == EAGAIN) {
480 /* Reset errno */
481 errno = 0;
482 ret = 0;
483 }
484
485 return ret;
486 }
487
488 /**
489 * tracefs_cpu_flush_write - Finish out and read the rest of the raw tracing file
490 * @tcpu: The descriptor representing the raw trace file
491 * @wfd: The write file descriptor to write the data to
492 *
493 * Reads the trace_pipe_raw file associated by the @tcpu and writes it to
494 * @wfd. This should be called at the end of tracing to get the rest of the data.
495 *
496 * Returns the number of bytes written, or negative on error.
497 */
tracefs_cpu_flush_write(struct tracefs_cpu * tcpu,int wfd)498 int tracefs_cpu_flush_write(struct tracefs_cpu *tcpu, int wfd)
499 {
500 char buffer[tcpu->subbuf_size];
501 int ret;
502
503 ret = tracefs_cpu_flush(tcpu, buffer);
504 if (ret > 0)
505 ret = write(wfd, buffer, ret);
506
507 /* It's OK if there's no data to read */
508 if (ret < 0 && errno == EAGAIN)
509 ret = 0;
510
511 return ret;
512 }
513
514 /**
515 * tracefs_cpu_write - Write the raw trace file into a file descriptor
516 * @tcpu: The descriptor representing the raw trace file
517 * @wfd: The write file descriptor to write the data to
518 * @nonblock: Hint to not block on the read if there's no data.
519 *
520 * This will pipe the data from the trace_pipe_raw file associated with @tcpu
521 * into the @wfd file descriptor. If @nonblock is set, then it will not
522 * block on if there's nothing to write. Note, it will only write sub buffer
523 * size data to @wfd. Calls to tracefs_cpu_flush_write() are needed to
524 * write out the rest.
525 *
526 * Returns the number of bytes read or negative on error.
527 */
tracefs_cpu_write(struct tracefs_cpu * tcpu,int wfd,bool nonblock)528 int tracefs_cpu_write(struct tracefs_cpu *tcpu, int wfd, bool nonblock)
529 {
530 char buffer[tcpu->subbuf_size];
531 int mode = SPLICE_F_MOVE;
532 int tot_write = 0;
533 int tot;
534 int ret;
535
536 ret = wait_on_input(tcpu, nonblock);
537 if (ret <= 0)
538 return ret;
539
540 if (tcpu->flags & TC_NONBLOCK)
541 mode |= SPLICE_F_NONBLOCK;
542
543 ret = init_splice(tcpu);
544 if (ret < 0)
545 return ret;
546
547 tot = splice(tcpu->fd, NULL, tcpu->splice_pipe[1], NULL,
548 tcpu->pipe_size, mode);
549 if (tot < 0)
550 return tot;
551
552 if (tot == 0)
553 return 0;
554
555 ret = splice(tcpu->splice_pipe[0], NULL, wfd, NULL,
556 tot, SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
557
558 if (ret >= 0)
559 return ret;
560
561 /* Some file systems do not allow splicing, try writing instead */
562 do {
563 int r = tcpu->subbuf_size;
564
565 if (r > tot)
566 r = tot;
567
568 ret = read(tcpu->splice_pipe[0], buffer, r);
569 if (ret > 0) {
570 tot -= ret;
571 ret = write(wfd, buffer, ret);
572 }
573 if (ret > 0)
574 tot_write += ret;
575 } while (ret > 0);
576
577 if (ret < 0)
578 return ret;
579
580 return tot_write;
581 }
582
583 /**
584 * tracefs_cpu_pipe - Write the raw trace file into a pipe descriptor
585 * @tcpu: The descriptor representing the raw trace file
586 * @wfd: The write file descriptor to write the data to (must be a pipe)
587 * @nonblock: Hint to not block on the read if there's no data.
588 *
589 * This will splice directly the file descriptor of the trace_pipe_raw
590 * file to the given @wfd, which must be a pipe. This can also be used
591 * if @tcpu was created with tracefs_cpu_create_fd() where the passed
592 * in @fd there was a pipe, then @wfd does not need to be a pipe.
593 *
594 * Returns the number of bytes read or negative on error.
595 */
tracefs_cpu_pipe(struct tracefs_cpu * tcpu,int wfd,bool nonblock)596 int tracefs_cpu_pipe(struct tracefs_cpu *tcpu, int wfd, bool nonblock)
597 {
598 int mode = SPLICE_F_MOVE;
599 int ret;
600
601 ret = wait_on_input(tcpu, nonblock);
602 if (ret <= 0)
603 return ret;
604
605 if (tcpu->flags & TC_NONBLOCK)
606 mode |= SPLICE_F_NONBLOCK;
607
608 ret = splice(tcpu->fd, NULL, wfd, NULL,
609 tcpu->pipe_size, mode);
610 return ret;
611 }
612