• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  * Copyright (C) 2022 Google Inc, Steven Rostedt <rostedt@goodmis.org>
4  */
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <dirent.h>
8 #include <unistd.h>
9 #include <fcntl.h>
10 #include <limits.h>
11 #include <errno.h>
12 #include <sys/stat.h>
13 #include <sys/ioctl.h>
14 #include <sys/select.h>
15 
16 #include <kbuffer.h>
17 
18 #include "tracefs.h"
19 #include "tracefs-local.h"
20 
21 enum {
22 	TC_STOP			= 1 << 0,   /* Stop reading */
23 	TC_PERM_NONBLOCK	= 1 << 1,   /* read is always non blocking */
24 	TC_NONBLOCK		= 1 << 2,   /* read is non blocking */
25 };
26 
27 struct tracefs_cpu {
28 	int		fd;
29 	int		flags;
30 	int		nfds;
31 	int		ctrl_pipe[2];
32 	int		splice_pipe[2];
33 	int		pipe_size;
34 	int		subbuf_size;
35 	int		buffered;
36 	int		splice_read_flags;
37 };
38 
39 /**
40  * tracefs_cpu_alloc_fd - create a tracefs_cpu instance for an existing fd
41  * @fd: The file descriptor to attach the tracefs_cpu to
42  * @subbuf_size: The expected size to read the subbuffer with
43  * @nonblock: If true, the file will be opened in O_NONBLOCK mode
44  *
45  * Return a descriptor that can read the tracefs trace_pipe_raw file
46  * that is associated with the given @fd and must be read in @subbuf_size.
47  *
48  * Returns NULL on error.
49  */
50 struct tracefs_cpu *
tracefs_cpu_alloc_fd(int fd,int subbuf_size,bool nonblock)51 tracefs_cpu_alloc_fd(int fd, int subbuf_size, bool nonblock)
52 {
53 	struct tracefs_cpu *tcpu;
54 	int mode = O_RDONLY;
55 	int ret;
56 
57 	tcpu = calloc(1, sizeof(*tcpu));
58 	if (!tcpu)
59 		return NULL;
60 
61 	if (nonblock) {
62 		mode |= O_NONBLOCK;
63 		tcpu->flags |= TC_NONBLOCK | TC_PERM_NONBLOCK;
64 	}
65 
66 	tcpu->splice_pipe[0] = -1;
67 	tcpu->splice_pipe[1] = -1;
68 
69 	tcpu->fd = fd;
70 
71 	tcpu->subbuf_size = subbuf_size;
72 
73 	if (tcpu->flags & TC_PERM_NONBLOCK) {
74 		tcpu->ctrl_pipe[0] = -1;
75 		tcpu->ctrl_pipe[1] = -1;
76 	} else {
77 		/* ctrl_pipe is used to break out of blocked reads */
78 		ret = pipe(tcpu->ctrl_pipe);
79 		if (ret < 0)
80 			goto fail;
81 		if (tcpu->ctrl_pipe[0] > tcpu->fd)
82 			tcpu->nfds = tcpu->ctrl_pipe[0] + 1;
83 		else
84 			tcpu->nfds = tcpu->fd + 1;
85 	}
86 
87 	return tcpu;
88  fail:
89 	free(tcpu);
90 	return NULL;
91 }
92 
93 /**
94  * tracefs_cpu_open - open an instance raw trace file
95  * @instance: the instance (NULL for toplevel) of the cpu raw file to open
96  * @cpu: The CPU that the raw trace file is associated with
97  * @nonblock: If true, the file will be opened in O_NONBLOCK mode
98  *
99  * Return a descriptor that can read the tracefs trace_pipe_raw file
100  * for a give @cpu in a given @instance.
101  *
102  * Returns NULL on error.
103  */
104 struct tracefs_cpu *
tracefs_cpu_open(struct tracefs_instance * instance,int cpu,bool nonblock)105 tracefs_cpu_open(struct tracefs_instance *instance, int cpu, bool nonblock)
106 {
107 	struct tracefs_cpu *tcpu;
108 	struct tep_handle *tep;
109 	char path[128];
110 	char *buf;
111 	int mode = O_RDONLY;
112 	int subbuf_size;
113 	int len;
114 	int ret;
115 	int fd;
116 
117 	if (nonblock)
118 		mode |= O_NONBLOCK;
119 
120 	sprintf(path, "per_cpu/cpu%d/trace_pipe_raw", cpu);
121 
122 	fd = tracefs_instance_file_open(instance, path, mode);
123 	if (fd < 0)
124 		return NULL;
125 
126 	tep = tep_alloc();
127 	if (!tep)
128 		goto fail;
129 
130 	/* Get the size of the page */
131 	buf = tracefs_instance_file_read(NULL, "events/header_page", &len);
132 	if (!buf)
133 		goto fail;
134 
135 	ret = tep_parse_header_page(tep, buf, len, sizeof(long));
136 	free(buf);
137 	if (ret < 0)
138 		goto fail;
139 
140 	subbuf_size = tep_get_sub_buffer_size(tep);
141 	tep_free(tep);
142 	tep = NULL;
143 
144 	tcpu = tracefs_cpu_alloc_fd(fd, subbuf_size, nonblock);
145 	if (!tcpu)
146 		goto fail;
147 
148 	return tcpu;
149  fail:
150 	tep_free(tep);
151 	close(fd);
152 	return NULL;
153 }
154 
close_fd(int fd)155 static void close_fd(int fd)
156 {
157 	if (fd < 0)
158 		return;
159 	close(fd);
160 }
161 
162 /**
163  * tracefs_cpu_free_fd - clean up the tracefs_cpu descriptor
164  * @tcpu: The descriptor created with tracefs_cpu_alloc_fd()
165  *
166  * Closes all the internal file descriptors that were opened by
167  * tracefs_cpu_alloc_fd(), and frees the descriptor.
168  */
tracefs_cpu_free_fd(struct tracefs_cpu * tcpu)169 void tracefs_cpu_free_fd(struct tracefs_cpu *tcpu)
170 {
171 	close_fd(tcpu->ctrl_pipe[0]);
172 	close_fd(tcpu->ctrl_pipe[1]);
173 	close_fd(tcpu->splice_pipe[0]);
174 	close_fd(tcpu->splice_pipe[1]);
175 
176 	free(tcpu);
177 }
178 
179 /**
180  * tracefs_cpu_close - clean up and close a raw trace descriptor
181  * @tcpu: The descriptor created with tracefs_cpu_open()
182  *
183  * Closes all the file descriptors associated to the trace_pipe_raw
184  * opened by tracefs_cpu_open().
185  */
tracefs_cpu_close(struct tracefs_cpu * tcpu)186 void tracefs_cpu_close(struct tracefs_cpu *tcpu)
187 {
188 	if (!tcpu)
189 		return;
190 
191 	close(tcpu->fd);
192 	tracefs_cpu_free_fd(tcpu);
193 }
194 
195 /**
196  * tracefs_cpu_read_size - Return the size of the sub buffer
197  * @tcpu: The descriptor that holds the size of the sub buffer
198  *
199  * A lot of the functions that read the data from the trace_pipe_raw
200  * expect the caller to have allocated enough space to store a full
201  * subbuffer. Calling this function is a requirement to do so.
202  */
tracefs_cpu_read_size(struct tracefs_cpu * tcpu)203 int tracefs_cpu_read_size(struct tracefs_cpu *tcpu)
204 {
205 	if (!tcpu)
206 		return -1;
207 	return tcpu->subbuf_size;
208 }
209 
set_nonblock(struct tracefs_cpu * tcpu)210 static void set_nonblock(struct tracefs_cpu *tcpu)
211 {
212 	long flags;
213 
214 	if (tcpu->flags & TC_NONBLOCK)
215 		return;
216 
217 	flags = fcntl(tcpu->fd, F_GETFL);
218 	fcntl(tcpu->fd, F_SETFL, flags | O_NONBLOCK);
219 	tcpu->flags |= TC_NONBLOCK;
220 }
221 
unset_nonblock(struct tracefs_cpu * tcpu)222 static void unset_nonblock(struct tracefs_cpu *tcpu)
223 {
224 	long flags;
225 
226 	if (!(tcpu->flags & TC_NONBLOCK))
227 		return;
228 
229 	flags = fcntl(tcpu->fd, F_GETFL);
230 	flags &= ~O_NONBLOCK;
231 	fcntl(tcpu->fd, F_SETFL, flags);
232 	tcpu->flags &= ~TC_NONBLOCK;
233 }
234 
235 /*
236  * If set to blocking mode, block until the watermark has been
237  * reached, or the control has said to stop. If the contol is
238  * set, then nonblock will be set to true on the way out.
239  */
wait_on_input(struct tracefs_cpu * tcpu,bool nonblock)240 static int wait_on_input(struct tracefs_cpu *tcpu, bool nonblock)
241 {
242 	fd_set rfds;
243 	int ret;
244 
245 	if (tcpu->flags & TC_PERM_NONBLOCK)
246 		return 1;
247 
248 	if (nonblock) {
249 		set_nonblock(tcpu);
250 		return 1;
251 	} else {
252 		unset_nonblock(tcpu);
253 	}
254 
255 	FD_ZERO(&rfds);
256 	FD_SET(tcpu->fd, &rfds);
257 	FD_SET(tcpu->ctrl_pipe[0], &rfds);
258 
259 	ret = select(tcpu->nfds, &rfds, NULL, NULL, NULL);
260 
261 	/* Let the application decide what to do with signals and such */
262 	if (ret < 0)
263 		return ret;
264 
265 	if (FD_ISSET(tcpu->ctrl_pipe[0], &rfds)) {
266 		/* Flush the ctrl pipe */
267 		read(tcpu->ctrl_pipe[0], &ret, 1);
268 
269 		/* Make nonblock as it is now stopped */
270 		set_nonblock(tcpu);
271 		/* Permanently set unblock */
272 		tcpu->flags |= TC_PERM_NONBLOCK;
273 	}
274 
275 	return FD_ISSET(tcpu->fd, &rfds);
276 }
277 
278 /**
279  * tracefs_cpu_read - read from the raw trace file
280  * @tcpu: The descriptor representing the raw trace file
281  * @buffer: Where to read into (must be at least the size of the subbuffer)
282  * @nonblock: Hint to not block on the read if there's no data.
283  *
284  * Reads the trace_pipe_raw files associated to @tcpu into @buffer.
285  * @buffer must be at least the size of the sub buffer of the ring buffer,
286  * which is returned by tracefs_cpu_read_size().
287  *
288  * If @nonblock is set, and there's no data available, it will return
289  * immediately. Otherwise depending on how @tcpu was opened, it will
290  * block. If @tcpu was opened with nonblock set, then this @nonblock
291  * will make no difference.
292  *
293  * Returns the amount read or -1 on error.
294  */
tracefs_cpu_read(struct tracefs_cpu * tcpu,void * buffer,bool nonblock)295 int tracefs_cpu_read(struct tracefs_cpu *tcpu, void *buffer, bool nonblock)
296 {
297 	int ret;
298 
299 	/*
300 	 * If nonblock is set, then the wait_on_input() will return
301 	 * immediately, if there's nothing in the buffer, with
302 	 * ret == 0.
303 	 */
304 	ret = wait_on_input(tcpu, nonblock);
305 	if (ret <= 0)
306 		return ret;
307 
308 	ret = read(tcpu->fd, buffer, tcpu->subbuf_size);
309 
310 	/* It's OK if there's no data to read */
311 	if (ret < 0 && errno == EAGAIN) {
312 		/* Reset errno */
313 		errno = 0;
314 		ret = 0;
315 	}
316 
317 	return ret;
318 }
319 
init_splice(struct tracefs_cpu * tcpu)320 static int init_splice(struct tracefs_cpu *tcpu)
321 {
322 	int ret;
323 
324 	if (tcpu->splice_pipe[0] >= 0)
325 		return 0;
326 
327 	ret = pipe(tcpu->splice_pipe);
328 	if (ret < 0)
329 		return ret;
330 
331 	ret = fcntl(tcpu->splice_pipe[0], F_GETPIPE_SZ, &tcpu->pipe_size);
332 	/*
333 	 * F_GETPIPE_SZ was introduced in 2.6.35, ftrace was introduced
334 	 * in 2.6.31. If we are running on an older kernel, just fall
335 	 * back to using subbuf_size for splice(). It could also return
336 	 * the size of the pipe and not set pipe_size.
337 	 */
338 	if (ret > 0 && !tcpu->pipe_size)
339 		tcpu->pipe_size = ret;
340 	else if (ret < 0)
341 		tcpu->pipe_size = tcpu->subbuf_size;
342 
343 	tcpu->splice_read_flags = SPLICE_F_MOVE;
344 	if (tcpu->flags & TC_NONBLOCK)
345 		tcpu->splice_read_flags |= SPLICE_F_NONBLOCK;
346 
347 	return 0;
348 }
349 
350 /**
351  * tracefs_cpu_buffered_read - Read the raw trace data buffering through a pipe
352  * @tcpu: The descriptor representing the raw trace file
353  * @buffer: Where to read into (must be at least the size of the subbuffer)
354  * @nonblock: Hint to not block on the read if there's no data.
355  *
356  * This is basically the same as tracefs_cpu_read() except that it uses
357  * a pipe through splice to buffer reads. This will batch reads keeping
358  * the reading from the ring buffer less intrusive to the system, as
359  * just reading all the time can cause quite a disturbance.
360  *
361  * Note, one difference between this and tracefs_cpu_read() is that it
362  * will read only in sub buffer pages. If the ring buffer has not filled
363  * a page, then it will not return anything, even with @nonblock set.
364  * Calls to tracefs_cpu_flush() should be done to read the rest of
365  * the file at the end of the trace.
366  *
367  * Returns the amount read or -1 on error.
368  */
tracefs_cpu_buffered_read(struct tracefs_cpu * tcpu,void * buffer,bool nonblock)369 int tracefs_cpu_buffered_read(struct tracefs_cpu *tcpu, void *buffer, bool nonblock)
370 {
371 	int mode = SPLICE_F_MOVE;
372 	int ret;
373 
374 	if (tcpu->buffered < 0)
375 		tcpu->buffered = 0;
376 
377 	if (tcpu->buffered)
378 		goto do_read;
379 
380 	ret = wait_on_input(tcpu, nonblock);
381 	if (ret <= 0)
382 		return ret;
383 
384 	if (tcpu->flags & TC_NONBLOCK)
385 		mode |= SPLICE_F_NONBLOCK;
386 
387 	ret = init_splice(tcpu);
388 	if (ret < 0)
389 		return ret;
390 
391 	ret = splice(tcpu->fd, NULL, tcpu->splice_pipe[1], NULL,
392 		     tcpu->pipe_size, mode);
393 	if (ret <= 0)
394 		return ret;
395 
396 	tcpu->buffered = ret;
397 
398  do_read:
399 	ret = read(tcpu->splice_pipe[0], buffer, tcpu->subbuf_size);
400 	if (ret > 0)
401 		tcpu->buffered -= ret;
402 	return ret;
403 }
404 
405 /**
406  * tracefs_cpu_stop - Stop a blocked read of the raw tracing file
407  * @tcpu: The descriptor representing the raw trace file
408  *
409  * This will attempt to unblock a task blocked on @tcpu reading it.
410  * On older kernels, it may not do anything for the pipe reads, as
411  * older kernels do not wake up tasks waiting on the ring buffer.
412  *
413  * Returns 0 if the tasks reading the raw tracing file does not
414  * need a nudge.
415  *
416  * Returns 1 if that tasks may need a nudge (send a signal).
417  *
418  * Returns negative on error.
419  */
tracefs_cpu_stop(struct tracefs_cpu * tcpu)420 int tracefs_cpu_stop(struct tracefs_cpu *tcpu)
421 {
422 	int ret = 1;
423 
424 	if (tcpu->flags & TC_PERM_NONBLOCK)
425 		return 0;
426 
427 	ret = write(tcpu->ctrl_pipe[1], &ret, 1);
428 	if (ret < 0)
429 		return ret;
430 
431 	/* Calling ioctl() on recent kernels will wake up the waiters */
432 	ret = ioctl(tcpu->fd, 0);
433 	if (ret < 0)
434 		ret = 1;
435 	else
436 		ret = 0;
437 
438 	set_nonblock(tcpu);
439 
440 	return ret;
441 }
442 
443 /**
444  * tracefs_cpu_flush - Finish out and read the rest of the raw tracing file
445  * @tcpu: The descriptor representing the raw trace file
446  * @buffer: Where to read into (must be at least the size of the subbuffer)
447  *
448  * Reads the trace_pipe_raw file associated by the @tcpu and puts it
449  * into @buffer, which must be the size of the sub buffer which is retrieved.
450  * by tracefs_cpu_read_size(). This should be called at the end of tracing
451  * to get the rest of the data.
452  *
453  * This will set the file descriptor for reading to non-blocking mode.
454  *
455  * Returns the number of bytes read, or negative on error.
456  */
tracefs_cpu_flush(struct tracefs_cpu * tcpu,void * buffer)457 int tracefs_cpu_flush(struct tracefs_cpu *tcpu, void *buffer)
458 {
459 	int ret;
460 
461 	/* Make sure that reading is now non blocking */
462 	set_nonblock(tcpu);
463 
464 	if (tcpu->buffered < 0)
465 		tcpu->buffered = 0;
466 
467 	if (tcpu->buffered) {
468 		ret = read(tcpu->splice_pipe[0], buffer, tcpu->subbuf_size);
469 		if (ret > 0)
470 			tcpu->buffered -= ret;
471 		return ret;
472 	}
473 
474 	ret = read(tcpu->fd, buffer, tcpu->subbuf_size);
475 	if (ret > 0 && tcpu->buffered)
476 		tcpu->buffered -= ret;
477 
478 	/* It's OK if there's no data to read */
479 	if (ret < 0 && errno == EAGAIN) {
480 		/* Reset errno */
481 		errno = 0;
482 		ret = 0;
483 	}
484 
485 	return ret;
486 }
487 
488 /**
489  * tracefs_cpu_flush_write - Finish out and read the rest of the raw tracing file
490  * @tcpu: The descriptor representing the raw trace file
491  * @wfd: The write file descriptor to write the data to
492  *
493  * Reads the trace_pipe_raw file associated by the @tcpu and writes it to
494  * @wfd. This should be called at the end of tracing to get the rest of the data.
495  *
496  * Returns the number of bytes written, or negative on error.
497  */
tracefs_cpu_flush_write(struct tracefs_cpu * tcpu,int wfd)498 int tracefs_cpu_flush_write(struct tracefs_cpu *tcpu, int wfd)
499 {
500 	char buffer[tcpu->subbuf_size];
501 	int ret;
502 
503 	ret = tracefs_cpu_flush(tcpu, buffer);
504 	if (ret > 0)
505 		ret = write(wfd, buffer, ret);
506 
507 	/* It's OK if there's no data to read */
508 	if (ret < 0 && errno == EAGAIN)
509 		ret = 0;
510 
511 	return ret;
512 }
513 
514 /**
515  * tracefs_cpu_write - Write the raw trace file into a file descriptor
516  * @tcpu: The descriptor representing the raw trace file
517  * @wfd: The write file descriptor to write the data to
518  * @nonblock: Hint to not block on the read if there's no data.
519  *
520  * This will pipe the data from the trace_pipe_raw file associated with @tcpu
521  * into the @wfd file descriptor. If @nonblock is set, then it will not
522  * block on if there's nothing to write. Note, it will only write sub buffer
523  * size data to @wfd. Calls to tracefs_cpu_flush_write() are needed to
524  * write out the rest.
525  *
526  * Returns the number of bytes read or negative on error.
527  */
tracefs_cpu_write(struct tracefs_cpu * tcpu,int wfd,bool nonblock)528 int tracefs_cpu_write(struct tracefs_cpu *tcpu, int wfd, bool nonblock)
529 {
530 	char buffer[tcpu->subbuf_size];
531 	int mode = SPLICE_F_MOVE;
532 	int tot_write = 0;
533 	int tot;
534 	int ret;
535 
536 	ret = wait_on_input(tcpu, nonblock);
537 	if (ret <= 0)
538 		return ret;
539 
540 	if (tcpu->flags & TC_NONBLOCK)
541 		mode |= SPLICE_F_NONBLOCK;
542 
543 	ret = init_splice(tcpu);
544 	if (ret < 0)
545 		return ret;
546 
547 	tot = splice(tcpu->fd, NULL, tcpu->splice_pipe[1], NULL,
548 		     tcpu->pipe_size, mode);
549 	if (tot < 0)
550 		return tot;
551 
552 	if (tot == 0)
553 		return 0;
554 
555 	ret = splice(tcpu->splice_pipe[0], NULL, wfd, NULL,
556 		     tot, SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
557 
558 	if (ret >= 0)
559 		return ret;
560 
561 	/* Some file systems do not allow splicing, try writing instead */
562 	do {
563 		int r = tcpu->subbuf_size;
564 
565 		if (r > tot)
566 			r = tot;
567 
568 		ret = read(tcpu->splice_pipe[0], buffer, r);
569 		if (ret > 0) {
570 			tot -= ret;
571 			ret = write(wfd, buffer, ret);
572 		}
573 		if (ret > 0)
574 			tot_write += ret;
575 	} while (ret > 0);
576 
577 	if (ret < 0)
578 		return ret;
579 
580 	return tot_write;
581 }
582 
583 /**
584  * tracefs_cpu_pipe - Write the raw trace file into a pipe descriptor
585  * @tcpu: The descriptor representing the raw trace file
586  * @wfd: The write file descriptor to write the data to (must be a pipe)
587  * @nonblock: Hint to not block on the read if there's no data.
588  *
589  * This will splice directly the file descriptor of the trace_pipe_raw
590  * file to the given @wfd, which must be a pipe. This can also be used
591  * if @tcpu was created with tracefs_cpu_create_fd() where the passed
592  * in @fd there was a pipe, then @wfd does not need to be a pipe.
593  *
594  * Returns the number of bytes read or negative on error.
595  */
tracefs_cpu_pipe(struct tracefs_cpu * tcpu,int wfd,bool nonblock)596 int tracefs_cpu_pipe(struct tracefs_cpu *tcpu, int wfd, bool nonblock)
597 {
598 	int mode = SPLICE_F_MOVE;
599 	int ret;
600 
601 	ret = wait_on_input(tcpu, nonblock);
602 	if (ret <= 0)
603 		return ret;
604 
605 	if (tcpu->flags & TC_NONBLOCK)
606 		mode |= SPLICE_F_NONBLOCK;
607 
608 	ret = splice(tcpu->fd, NULL, wfd, NULL,
609 		     tcpu->pipe_size, mode);
610 	return ret;
611 }
612