• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  * Copyright (C) 2022 Google Inc, Steven Rostedt <rostedt@goodmis.org>
4  */
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <dirent.h>
8 #include <unistd.h>
9 #include <fcntl.h>
10 #include <limits.h>
11 #include <errno.h>
12 #include <sys/stat.h>
13 #include <sys/ioctl.h>
14 #include <sys/select.h>
15 
16 #include <kbuffer.h>
17 
18 #include "tracefs.h"
19 #include "tracefs-local.h"
20 
21 enum {
22 	TC_STOP			= 1 << 0,   /* Stop reading */
23 	TC_PERM_NONBLOCK	= 1 << 1,   /* read is always non blocking */
24 	TC_NONBLOCK		= 1 << 2,   /* read is non blocking */
25 };
26 
27 struct tracefs_cpu {
28 	int		fd;
29 	int		flags;
30 	int		nfds;
31 	int		ctrl_pipe[2];
32 	int		splice_pipe[2];
33 	int		pipe_size;
34 	int		subbuf_size;
35 	int		buffered;
36 	int		splice_read_flags;
37 	struct kbuffer	*kbuf;
38 	void		*buffer;
39 	void		*mapping;
40 };
41 
42 /**
43  * tracefs_cpu_alloc_fd - create a tracefs_cpu instance for an existing fd
44  * @fd: The file descriptor to attach the tracefs_cpu to
45  * @subbuf_size: The expected size to read the subbuffer with
46  * @nonblock: If true, the file will be opened in O_NONBLOCK mode
47  *
48  * Return a descriptor that can read the tracefs trace_pipe_raw file
49  * that is associated with the given @fd and must be read in @subbuf_size.
50  *
51  * Returns NULL on error.
52  */
53 struct tracefs_cpu *
tracefs_cpu_alloc_fd(int fd,int subbuf_size,bool nonblock)54 tracefs_cpu_alloc_fd(int fd, int subbuf_size, bool nonblock)
55 {
56 	struct tracefs_cpu *tcpu;
57 	int mode = O_RDONLY;
58 	int ret;
59 
60 	tcpu = calloc(1, sizeof(*tcpu));
61 	if (!tcpu)
62 		return NULL;
63 
64 	if (nonblock) {
65 		mode |= O_NONBLOCK;
66 		tcpu->flags |= TC_NONBLOCK | TC_PERM_NONBLOCK;
67 	}
68 
69 	tcpu->splice_pipe[0] = -1;
70 	tcpu->splice_pipe[1] = -1;
71 
72 	tcpu->fd = fd;
73 
74 	tcpu->subbuf_size = subbuf_size;
75 
76 	if (tcpu->flags & TC_PERM_NONBLOCK) {
77 		tcpu->ctrl_pipe[0] = -1;
78 		tcpu->ctrl_pipe[1] = -1;
79 	} else {
80 		/* ctrl_pipe is used to break out of blocked reads */
81 		ret = pipe(tcpu->ctrl_pipe);
82 		if (ret < 0)
83 			goto fail;
84 		if (tcpu->ctrl_pipe[0] > tcpu->fd)
85 			tcpu->nfds = tcpu->ctrl_pipe[0] + 1;
86 		else
87 			tcpu->nfds = tcpu->fd + 1;
88 	}
89 
90 	return tcpu;
91  fail:
92 	free(tcpu);
93 	return NULL;
94 }
95 
cpu_open(struct tracefs_instance * instance,const char * path_fmt,int cpu,bool nonblock)96 static struct tracefs_cpu *cpu_open(struct tracefs_instance *instance,
97 				    const char *path_fmt, int cpu, bool nonblock)
98 {
99 	struct tracefs_cpu *tcpu;
100 	struct tep_handle *tep;
101 	struct kbuffer *kbuf;
102 	char path[128];
103 	int mode = O_RDONLY;
104 	int subbuf_size;
105 	int ret;
106 	int fd;
107 
108 	if (nonblock)
109 		mode |= O_NONBLOCK;
110 
111 	sprintf(path, path_fmt, cpu);
112 
113 	fd = tracefs_instance_file_open(instance, path, mode);
114 	if (fd < 0)
115 		return NULL;
116 
117 	tep = tep_alloc();
118 	if (!tep)
119 		goto fail;
120 
121 	/* Get the size of the page */
122 	ret = tracefs_load_headers(NULL, tep);
123 	if (ret < 0)
124 		goto fail;
125 
126 	subbuf_size = tep_get_sub_buffer_size(tep);
127 
128 	kbuf = tep_kbuffer(tep);
129 	if (!kbuf)
130 		goto fail;
131 
132 	tep_free(tep);
133 	tep = NULL;
134 
135 	tcpu = tracefs_cpu_alloc_fd(fd, subbuf_size, nonblock);
136 	if (!tcpu)
137 		goto fail;
138 
139 	tcpu->kbuf = kbuf;
140 
141 	return tcpu;
142  fail:
143 	tep_free(tep);
144 	close(fd);
145 	return NULL;
146 }
147 
148 /**
149  * tracefs_cpu_open - open an instance raw trace file
150  * @instance: the instance (NULL for toplevel) of the cpu raw file to open
151  * @cpu: The CPU that the raw trace file is associated with
152  * @nonblock: If true, the file will be opened in O_NONBLOCK mode
153  *
154  * Return a descriptor that can read the tracefs trace_pipe_raw file
155  * for a give @cpu in a given @instance.
156  *
157  * Returns NULL on error.
158  */
159 struct tracefs_cpu *
tracefs_cpu_open(struct tracefs_instance * instance,int cpu,bool nonblock)160 tracefs_cpu_open(struct tracefs_instance *instance, int cpu, bool nonblock)
161 {
162 	return cpu_open(instance, "per_cpu/cpu%d/trace_pipe_raw", cpu, nonblock);
163 }
164 
165 /**
166  * tracefs_cpu_snapshot_open - open an instance snapshot raw trace file
167  * @instance: the instance (NULL for toplevel) of the cpu raw file to open
168  * @cpu: The CPU that the raw trace file is associated with
169  * @nonblock: If true, the file will be opened in O_NONBLOCK mode
170  *
171  * Return a descriptor that can read the tracefs snapshot_raw file
172  * for a give @cpu in a given @instance.
173  *
174  * In nonblock mode, it will block if the snapshot is empty and wake up
175  * when there's a new snapshot.
176  *
177  * Returns NULL on error.
178  */
179 struct tracefs_cpu *
tracefs_cpu_snapshot_open(struct tracefs_instance * instance,int cpu,bool nonblock)180 tracefs_cpu_snapshot_open(struct tracefs_instance *instance, int cpu, bool nonblock)
181 {
182 	return cpu_open(instance, "per_cpu/cpu%d/snapshot_raw", cpu, nonblock);
183 }
184 
185 /**
186  * tracefs_snapshot_snap - takes a snapshot (allocates if necessary)
187  * @instance: The instance to take a snapshot on
188  *
189  * Takes a snapshot of the current ring buffer.
190  *
191  * Returns 0 on success, -1 on error.
192  */
tracefs_snapshot_snap(struct tracefs_instance * instance)193 int tracefs_snapshot_snap(struct tracefs_instance *instance)
194 {
195 	int ret;
196 
197 	ret = tracefs_instance_file_write(instance, "snapshot", "1");
198 	return ret < 0 ? -1 : 0;
199 }
200 
201 /**
202  * tracefs_snapshot_clear - clears the snapshot
203  * @instance: The instance to clear the snapshot
204  *
205  * Clears the snapshot buffer for the @instance.
206  *
207  * Returns 0 on success, -1 on error.
208  */
tracefs_snapshot_clear(struct tracefs_instance * instance)209 int tracefs_snapshot_clear(struct tracefs_instance *instance)
210 {
211 	int ret;
212 
213 	ret = tracefs_instance_file_write(instance, "snapshot", "2");
214 	return ret < 0 ? -1 : 0;
215 }
216 
217 /**
218  * tracefs_snapshot_free - frees the snapshot
219  * @instance: The instance to free the snapshot
220  *
221  * Frees the snapshot for the given @instance.
222  *
223  * Returns 0 on success, -1 on error.
224  */
tracefs_snapshot_free(struct tracefs_instance * instance)225 int tracefs_snapshot_free(struct tracefs_instance *instance)
226 {
227 	int ret;
228 
229 	ret = tracefs_instance_file_write(instance, "snapshot", "0");
230 	return ret < 0 ? -1 : 0;
231 }
232 
233 /**
234  * tracefs_cpu_open_mapped - open an instance raw trace file and map it
235  * @instance: the instance (NULL for toplevel) of the cpu raw file to open
236  * @cpu: The CPU that the raw trace file is associated with
237  * @nonblock: If true, the file will be opened in O_NONBLOCK mode
238  *
239  * Return a descriptor that can read the tracefs trace_pipe_raw file
240  * for a give @cpu in a given @instance.
241  *
242  * Returns NULL on error.
243  */
244 struct tracefs_cpu *
tracefs_cpu_open_mapped(struct tracefs_instance * instance,int cpu,bool nonblock)245 tracefs_cpu_open_mapped(struct tracefs_instance *instance, int cpu, bool nonblock)
246 {
247 	struct tracefs_cpu *tcpu;
248 
249 	tcpu = tracefs_cpu_open(instance, cpu, nonblock);
250 	if (!tcpu)
251 		return NULL;
252 
253 	tracefs_cpu_map(tcpu);
254 
255 	return tcpu;
256 }
257 
close_fd(int fd)258 static void close_fd(int fd)
259 {
260 	if (fd < 0)
261 		return;
262 	close(fd);
263 }
264 
265 /**
266  * tracefs_cpu_free_fd - clean up the tracefs_cpu descriptor
267  * @tcpu: The descriptor created with tracefs_cpu_alloc_fd()
268  *
269  * Closes all the internal file descriptors that were opened by
270  * tracefs_cpu_alloc_fd(), and frees the descriptor.
271  */
tracefs_cpu_free_fd(struct tracefs_cpu * tcpu)272 void tracefs_cpu_free_fd(struct tracefs_cpu *tcpu)
273 {
274 	close_fd(tcpu->ctrl_pipe[0]);
275 	close_fd(tcpu->ctrl_pipe[1]);
276 	close_fd(tcpu->splice_pipe[0]);
277 	close_fd(tcpu->splice_pipe[1]);
278 
279 	trace_unmap(tcpu->mapping);
280 	kbuffer_free(tcpu->kbuf);
281 	free(tcpu);
282 }
283 
284 /**
285  * tracefs_cpu_close - clean up and close a raw trace descriptor
286  * @tcpu: The descriptor created with tracefs_cpu_open()
287  *
288  * Closes all the file descriptors associated to the trace_pipe_raw
289  * opened by tracefs_cpu_open().
290  */
tracefs_cpu_close(struct tracefs_cpu * tcpu)291 void tracefs_cpu_close(struct tracefs_cpu *tcpu)
292 {
293 	if (!tcpu)
294 		return;
295 
296 	close(tcpu->fd);
297 	tracefs_cpu_free_fd(tcpu);
298 }
299 
300 /**
301  * tracefs_cpu_read_size - Return the size of the sub buffer
302  * @tcpu: The descriptor that holds the size of the sub buffer
303  *
304  * A lot of the functions that read the data from the trace_pipe_raw
305  * expect the caller to have allocated enough space to store a full
306  * subbuffer. Calling this function is a requirement to do so.
307  */
tracefs_cpu_read_size(struct tracefs_cpu * tcpu)308 int tracefs_cpu_read_size(struct tracefs_cpu *tcpu)
309 {
310 	if (!tcpu)
311 		return -1;
312 	return tcpu->subbuf_size;
313 }
314 
tracefs_cpu_is_mapped(struct tracefs_cpu * tcpu)315 bool tracefs_cpu_is_mapped(struct tracefs_cpu *tcpu)
316 {
317 	return tcpu->mapping != NULL;
318 }
319 
320 /**
321  * tracefs_mapped_is_supported - find out if memory mapping is supported
322  *
323  * Return true if the ring buffer can be memory mapped, or false on
324  * error or it cannot be.
325  */
tracefs_mapped_is_supported(void)326 bool tracefs_mapped_is_supported(void)
327 {
328 	struct tracefs_cpu *tcpu;
329 	bool ret;
330 
331 	tcpu = tracefs_cpu_open_mapped(NULL, 0, false);
332 	if (!tcpu)
333 		return false;
334 	ret = tracefs_cpu_is_mapped(tcpu);
335 	tracefs_cpu_close(tcpu);
336 	return ret;
337 }
338 
tracefs_cpu_map(struct tracefs_cpu * tcpu)339 int tracefs_cpu_map(struct tracefs_cpu *tcpu)
340 {
341 	if (tcpu->mapping)
342 		return 0;
343 
344 	tcpu->mapping = trace_mmap(tcpu->fd, tcpu->kbuf);
345 	return tcpu->mapping ? 0 : -1;
346 }
347 
tracefs_cpu_unmap(struct tracefs_cpu * tcpu)348 void tracefs_cpu_unmap(struct tracefs_cpu *tcpu)
349 {
350 	if (!tcpu->mapping)
351 		return;
352 
353 	trace_unmap(tcpu->mapping);
354 }
355 
set_nonblock(struct tracefs_cpu * tcpu)356 static void set_nonblock(struct tracefs_cpu *tcpu)
357 {
358 	long flags;
359 
360 	if (tcpu->flags & TC_NONBLOCK)
361 		return;
362 
363 	flags = fcntl(tcpu->fd, F_GETFL);
364 	fcntl(tcpu->fd, F_SETFL, flags | O_NONBLOCK);
365 	tcpu->flags |= TC_NONBLOCK;
366 }
367 
unset_nonblock(struct tracefs_cpu * tcpu)368 static void unset_nonblock(struct tracefs_cpu *tcpu)
369 {
370 	long flags;
371 
372 	if (!(tcpu->flags & TC_NONBLOCK))
373 		return;
374 
375 	flags = fcntl(tcpu->fd, F_GETFL);
376 	flags &= ~O_NONBLOCK;
377 	fcntl(tcpu->fd, F_SETFL, flags);
378 	tcpu->flags &= ~TC_NONBLOCK;
379 }
380 
381 /*
382  * If set to blocking mode, block until the watermark has been
383  * reached, or the control has said to stop. If the contol is
384  * set, then nonblock will be set to true on the way out.
385  */
wait_on_input(struct tracefs_cpu * tcpu,bool nonblock)386 static int wait_on_input(struct tracefs_cpu *tcpu, bool nonblock)
387 {
388 	fd_set rfds;
389 	int ret;
390 
391 	if (tcpu->flags & TC_PERM_NONBLOCK)
392 		return 1;
393 
394 	if (nonblock) {
395 		set_nonblock(tcpu);
396 		return 1;
397 	} else {
398 		unset_nonblock(tcpu);
399 	}
400 
401 	FD_ZERO(&rfds);
402 	FD_SET(tcpu->fd, &rfds);
403 	FD_SET(tcpu->ctrl_pipe[0], &rfds);
404 
405 	ret = select(tcpu->nfds, &rfds, NULL, NULL, NULL);
406 
407 	/* Let the application decide what to do with signals and such */
408 	if (ret < 0)
409 		return ret;
410 
411 	if (FD_ISSET(tcpu->ctrl_pipe[0], &rfds)) {
412 		/* Flush the ctrl pipe */
413 		read(tcpu->ctrl_pipe[0], &ret, 1);
414 
415 		/* Make nonblock as it is now stopped */
416 		set_nonblock(tcpu);
417 		/* Permanently set unblock */
418 		tcpu->flags |= TC_PERM_NONBLOCK;
419 	}
420 
421 	return FD_ISSET(tcpu->fd, &rfds);
422 }
423 
424 /* If nonblock is set, set errno to EAGAIN on no data */
mmap_read(struct tracefs_cpu * tcpu,void * buffer,bool nonblock)425 static int mmap_read(struct tracefs_cpu *tcpu, void *buffer, bool nonblock)
426 {
427 	void *mapping = tcpu->mapping;
428 	int ret;
429 
430 	ret = trace_mmap_read(mapping, buffer);
431 	if (ret <= 0) {
432 		if (!ret && nonblock)
433 			errno = EAGAIN;
434 		return ret;
435 	}
436 
437 	/* Write full sub-buffer size, but zero out empty space */
438 	if (ret < tcpu->subbuf_size)
439 		memset(buffer + ret, 0, tcpu->subbuf_size - ret);
440 	return tcpu->subbuf_size;
441 }
442 
443 /**
444  * tracefs_cpu_read - read from the raw trace file
445  * @tcpu: The descriptor representing the raw trace file
446  * @buffer: Where to read into (must be at least the size of the subbuffer)
447  * @nonblock: Hint to not block on the read if there's no data.
448  *
449  * Reads the trace_pipe_raw files associated to @tcpu into @buffer.
450  * @buffer must be at least the size of the sub buffer of the ring buffer,
451  * which is returned by tracefs_cpu_read_size().
452  *
453  * If @nonblock is set, and there's no data available, it will return
454  * immediately. Otherwise depending on how @tcpu was opened, it will
455  * block. If @tcpu was opened with nonblock set, then this @nonblock
456  * will make no difference.
457  *
458  * Returns the amount read or -1 on error.
459  */
tracefs_cpu_read(struct tracefs_cpu * tcpu,void * buffer,bool nonblock)460 int tracefs_cpu_read(struct tracefs_cpu *tcpu, void *buffer, bool nonblock)
461 {
462 	int ret;
463 
464 	/*
465 	 * If nonblock is set, then the wait_on_input() will return
466 	 * immediately, if there's nothing in the buffer, with
467 	 * ret == 0.
468 	 */
469 	ret = wait_on_input(tcpu, nonblock);
470 	if (ret <= 0)
471 		return ret;
472 
473 	if (tcpu->mapping)
474 		return mmap_read(tcpu, buffer, nonblock);
475 
476 	ret = read(tcpu->fd, buffer, tcpu->subbuf_size);
477 
478 	/* It's OK if there's no data to read */
479 	if (ret < 0 && errno == EAGAIN) {
480 		/* Reset errno */
481 		errno = 0;
482 		ret = 0;
483 	}
484 
485 	return ret;
486 }
487 
get_buffer(struct tracefs_cpu * tcpu)488 static bool get_buffer(struct tracefs_cpu *tcpu)
489 {
490 	if (!tcpu->buffer) {
491 		tcpu->buffer = malloc(tcpu->subbuf_size);
492 		if (!tcpu->buffer)
493 			return false;
494 	}
495 	return true;
496 }
497 
498 /**
499  * tracefs_cpu_read_buf - read from the raw trace file and return kbuffer
500  * @tcpu: The descriptor representing the raw trace file
501  * @nonblock: Hint to not block on the read if there's no data.
502  *
503  * Reads the trace_pipe_raw files associated to @tcpu and returns a kbuffer
504  * associated with the read that can be used to parse events.
505  *
506  * If @nonblock is set, and there's no data available, it will return
507  * immediately. Otherwise depending on how @tcpu was opened, it will
508  * block. If @tcpu was opened with nonblock set, then this @nonblock
509  * will make no difference.
510  *
511  * Returns a kbuffer associated to the next sub-buffer or NULL on error
512  * or no data to read with nonblock set (EAGAIN will be set).
513  *
514  * The kbuffer returned should not be freed!
515  */
tracefs_cpu_read_buf(struct tracefs_cpu * tcpu,bool nonblock)516 struct kbuffer *tracefs_cpu_read_buf(struct tracefs_cpu *tcpu, bool nonblock)
517 {
518 	int ret;
519 
520 	/* If mapping is enabled, just use it directly */
521 	if (tcpu->mapping) {
522 		ret = wait_on_input(tcpu, nonblock);
523 		if (ret <= 0)
524 			return NULL;
525 
526 		ret = trace_mmap_load_subbuf(tcpu->mapping, tcpu->kbuf);
527 		return ret > 0 ? tcpu->kbuf : NULL;
528 	}
529 
530 	if (!get_buffer(tcpu))
531 		return NULL;
532 
533 	ret = tracefs_cpu_read(tcpu, tcpu->buffer, nonblock);
534 	if (ret <= 0)
535 		return NULL;
536 
537 	kbuffer_load_subbuffer(tcpu->kbuf, tcpu->buffer);
538 	return tcpu->kbuf;
539 }
540 
init_splice(struct tracefs_cpu * tcpu)541 static int init_splice(struct tracefs_cpu *tcpu)
542 {
543 	char *buf;
544 	int ret;
545 
546 	if (tcpu->splice_pipe[0] >= 0)
547 		return 0;
548 
549 	ret = pipe(tcpu->splice_pipe);
550 	if (ret < 0)
551 		return ret;
552 
553 	if (str_read_file("/proc/sys/fs/pipe-max-size", &buf, false)) {
554 		int size = atoi(buf);
555 		fcntl(tcpu->splice_pipe[0], F_SETPIPE_SZ, &size);
556 		free(buf);
557 	}
558 
559 	ret = fcntl(tcpu->splice_pipe[0], F_GETPIPE_SZ, &tcpu->pipe_size);
560 	/*
561 	 * F_GETPIPE_SZ was introduced in 2.6.35, ftrace was introduced
562 	 * in 2.6.31. If we are running on an older kernel, just fall
563 	 * back to using subbuf_size for splice(). It could also return
564 	 * the size of the pipe and not set pipe_size.
565 	 */
566 	if (ret > 0 && !tcpu->pipe_size)
567 		tcpu->pipe_size = ret;
568 	else if (ret < 0)
569 		tcpu->pipe_size = tcpu->subbuf_size;
570 
571 	tcpu->splice_read_flags = SPLICE_F_MOVE;
572 	if (tcpu->flags & TC_NONBLOCK)
573 		tcpu->splice_read_flags |= SPLICE_F_NONBLOCK;
574 
575 	return 0;
576 }
577 
578 /**
579  * tracefs_cpu_buffered_read - Read the raw trace data buffering through a pipe
580  * @tcpu: The descriptor representing the raw trace file
581  * @buffer: Where to read into (must be at least the size of the subbuffer)
582  * @nonblock: Hint to not block on the read if there's no data.
583  *
584  * This is basically the same as tracefs_cpu_read() except that it uses
585  * a pipe through splice to buffer reads. This will batch reads keeping
586  * the reading from the ring buffer less intrusive to the system, as
587  * just reading all the time can cause quite a disturbance.
588  *
589  * Note, one difference between this and tracefs_cpu_read() is that it
590  * will read only in sub buffer pages. If the ring buffer has not filled
591  * a page, then it will not return anything, even with @nonblock set.
592  * Calls to tracefs_cpu_flush() should be done to read the rest of
593  * the file at the end of the trace.
594  *
595  * Returns the amount read or -1 on error.
596  */
tracefs_cpu_buffered_read(struct tracefs_cpu * tcpu,void * buffer,bool nonblock)597 int tracefs_cpu_buffered_read(struct tracefs_cpu *tcpu, void *buffer, bool nonblock)
598 {
599 	int mode = SPLICE_F_MOVE;
600 	int ret;
601 
602 	if (tcpu->buffered < 0)
603 		tcpu->buffered = 0;
604 
605 	if (tcpu->buffered)
606 		goto do_read;
607 
608 	ret = wait_on_input(tcpu, nonblock);
609 	if (ret <= 0)
610 		return ret;
611 
612 	if (tcpu->mapping)
613 		return mmap_read(tcpu, buffer, nonblock);
614 
615 	if (tcpu->flags & TC_NONBLOCK)
616 		mode |= SPLICE_F_NONBLOCK;
617 
618 	ret = init_splice(tcpu);
619 	if (ret < 0)
620 		return ret;
621 
622 	ret = splice(tcpu->fd, NULL, tcpu->splice_pipe[1], NULL,
623 		     tcpu->pipe_size, mode);
624 	if (ret <= 0)
625 		return ret;
626 
627 	tcpu->buffered = ret;
628 
629  do_read:
630 	ret = read(tcpu->splice_pipe[0], buffer, tcpu->subbuf_size);
631 	if (ret > 0)
632 		tcpu->buffered -= ret;
633 	return ret;
634 }
635 
636 /**
637  * tracefs_cpu_buffered_read_buf - Read the raw trace data buffering through a pipe
638  * @tcpu: The descriptor representing the raw trace file
639  * @nonblock: Hint to not block on the read if there's no data.
640  *
641  * This is basically the same as tracefs_cpu_read() except that it uses
642  * a pipe through splice to buffer reads. This will batch reads keeping
643  * the reading from the ring buffer less intrusive to the system, as
644  * just reading all the time can cause quite a disturbance.
645  *
646  * Note, one difference between this and tracefs_cpu_read() is that it
647  * will read only in sub buffer pages. If the ring buffer has not filled
648  * a page, then it will not return anything, even with @nonblock set.
649  * Calls to tracefs_cpu_flush() should be done to read the rest of
650  * the file at the end of the trace.
651  *
652  * Returns a kbuffer associated to the next sub-buffer or NULL on error
653  * or no data to read with nonblock set (EAGAIN will be set).
654  *
655  * The kbuffer returned should not be freed!
656  */
tracefs_cpu_buffered_read_buf(struct tracefs_cpu * tcpu,bool nonblock)657 struct kbuffer *tracefs_cpu_buffered_read_buf(struct tracefs_cpu *tcpu, bool nonblock)
658 {
659 	int ret;
660 
661 	/* If mapping is enabled, just use it directly */
662 	if (tcpu->mapping) {
663 		ret = wait_on_input(tcpu, nonblock);
664 		if (ret <= 0)
665 			return NULL;
666 
667 		ret = trace_mmap_load_subbuf(tcpu->mapping, tcpu->kbuf);
668 		return ret > 0 ? tcpu->kbuf : NULL;
669 	}
670 
671 	if (!get_buffer(tcpu))
672 		return NULL;
673 
674 	ret = tracefs_cpu_buffered_read(tcpu, tcpu->buffer, nonblock);
675 	if (ret <= 0)
676 		return NULL;
677 
678 	kbuffer_load_subbuffer(tcpu->kbuf, tcpu->buffer);
679 	return tcpu->kbuf;
680 }
681 
682 /**
683  * tracefs_cpu_stop - Stop a blocked read of the raw tracing file
684  * @tcpu: The descriptor representing the raw trace file
685  *
686  * This will attempt to unblock a task blocked on @tcpu reading it.
687  * On older kernels, it may not do anything for the pipe reads, as
688  * older kernels do not wake up tasks waiting on the ring buffer.
689  *
690  * Returns 0 if the tasks reading the raw tracing file does not
691  * need a nudge.
692  *
693  * Returns 1 if that tasks may need a nudge (send a signal).
694  *
695  * Returns negative on error.
696  */
tracefs_cpu_stop(struct tracefs_cpu * tcpu)697 int tracefs_cpu_stop(struct tracefs_cpu *tcpu)
698 {
699 	int ret = 1;
700 
701 	if (tcpu->flags & TC_PERM_NONBLOCK)
702 		return 0;
703 
704 	ret = write(tcpu->ctrl_pipe[1], &ret, 1);
705 	if (ret < 0)
706 		return ret;
707 
708 	/* Calling ioctl() on recent kernels will wake up the waiters */
709 	ret = ioctl(tcpu->fd, 0);
710 	if (ret < 0)
711 		ret = 1;
712 	else
713 		ret = 0;
714 
715 	set_nonblock(tcpu);
716 
717 	return ret;
718 }
719 
720 /**
721  * tracefs_cpu_flush - Finish out and read the rest of the raw tracing file
722  * @tcpu: The descriptor representing the raw trace file
723  * @buffer: Where to read into (must be at least the size of the subbuffer)
724  *
725  * Reads the trace_pipe_raw file associated by the @tcpu and puts it
726  * into @buffer, which must be the size of the sub buffer which is retrieved.
727  * by tracefs_cpu_read_size(). This should be called at the end of tracing
728  * to get the rest of the data.
729  *
730  * This will set the file descriptor for reading to non-blocking mode.
731  *
732  * Returns the number of bytes read, or negative on error.
733  */
tracefs_cpu_flush(struct tracefs_cpu * tcpu,void * buffer)734 int tracefs_cpu_flush(struct tracefs_cpu *tcpu, void *buffer)
735 {
736 	int ret;
737 
738 	/* Make sure that reading is now non blocking */
739 	set_nonblock(tcpu);
740 
741 	if (tcpu->buffered < 0)
742 		tcpu->buffered = 0;
743 
744 	if (tcpu->mapping)
745 		return mmap_read(tcpu, buffer, false);
746 
747 	if (tcpu->buffered) {
748 		ret = read(tcpu->splice_pipe[0], buffer, tcpu->subbuf_size);
749 		if (ret > 0)
750 			tcpu->buffered -= ret;
751 		return ret;
752 	}
753 
754 	ret = read(tcpu->fd, buffer, tcpu->subbuf_size);
755 	if (ret > 0 && tcpu->buffered)
756 		tcpu->buffered -= ret;
757 
758 	/* It's OK if there's no data to read */
759 	if (ret < 0 && errno == EAGAIN) {
760 		/* Reset errno */
761 		errno = 0;
762 		ret = 0;
763 	}
764 
765 	return ret;
766 }
767 
768 /**
769  * tracefs_cpu_flush_buf - Finish out and read the rest of the raw tracing file
770  * @tcpu: The descriptor representing the raw trace file
771  *
772  * Reads the trace_pipe_raw file associated by the @tcpu and puts it
773  * into @buffer, which must be the size of the sub buffer which is retrieved.
774  * by tracefs_cpu_read_size(). This should be called at the end of tracing
775  * to get the rest of the data.
776  *
777  * This will set the file descriptor for reading to non-blocking mode.
778  */
tracefs_cpu_flush_buf(struct tracefs_cpu * tcpu)779 struct kbuffer *tracefs_cpu_flush_buf(struct tracefs_cpu *tcpu)
780 {
781 	int ret;
782 
783 	if (!get_buffer(tcpu))
784 		return NULL;
785 
786 	if (tcpu->mapping) {
787 		/* Make sure that reading is now non blocking */
788 		set_nonblock(tcpu);
789 		ret = trace_mmap_load_subbuf(tcpu->mapping, tcpu->kbuf);
790 		return ret > 0 ? tcpu->kbuf : NULL;
791 	}
792 
793 	ret = tracefs_cpu_flush(tcpu, tcpu->buffer);
794 	if (ret <= 0)
795 		return NULL;
796 
797 	kbuffer_load_subbuffer(tcpu->kbuf, tcpu->buffer);
798 	return tcpu->kbuf;
799 }
800 
801 /**
802  * tracefs_cpu_flush_write - Finish out and read the rest of the raw tracing file
803  * @tcpu: The descriptor representing the raw trace file
804  * @wfd: The write file descriptor to write the data to
805  *
806  * Reads the trace_pipe_raw file associated by the @tcpu and writes it to
807  * @wfd. This should be called at the end of tracing to get the rest of the data.
808  *
809  * Returns the number of bytes written, or negative on error.
810  */
tracefs_cpu_flush_write(struct tracefs_cpu * tcpu,int wfd)811 int tracefs_cpu_flush_write(struct tracefs_cpu *tcpu, int wfd)
812 {
813 	char buffer[tcpu->subbuf_size];
814 	int ret;
815 
816 	ret = tracefs_cpu_flush(tcpu, buffer);
817 	if (ret > 0)
818 		ret = write(wfd, buffer, ret);
819 
820 	/* It's OK if there's no data to read */
821 	if (ret < 0 && errno == EAGAIN)
822 		ret = 0;
823 
824 	return ret;
825 }
826 
827 /**
828  * tracefs_cpu_write - Write the raw trace file into a file descriptor
829  * @tcpu: The descriptor representing the raw trace file
830  * @wfd: The write file descriptor to write the data to
831  * @nonblock: Hint to not block on the read if there's no data.
832  *
833  * This will pipe the data from the trace_pipe_raw file associated with @tcpu
834  * into the @wfd file descriptor. If @nonblock is set, then it will not
835  * block on if there's nothing to write. Note, it will only write sub buffer
836  * size data to @wfd. Calls to tracefs_cpu_flush_write() are needed to
837  * write out the rest.
838  *
839  * Returns the number of bytes read or negative on error.
840  */
tracefs_cpu_write(struct tracefs_cpu * tcpu,int wfd,bool nonblock)841 int tracefs_cpu_write(struct tracefs_cpu *tcpu, int wfd, bool nonblock)
842 {
843 	char buffer[tcpu->subbuf_size];
844 	int mode = SPLICE_F_MOVE;
845 	int tot_write = 0;
846 	int tot;
847 	int ret;
848 
849 	if (tcpu->mapping) {
850 		int r = tracefs_cpu_read(tcpu, buffer, nonblock);
851 		if (r < 0)
852 			return r;
853 		do {
854 			ret = write(wfd, buffer, r);
855 			if (ret < 0)
856 				return ret;
857 			r -= ret;
858 			tot_write += ret;
859 		} while (r > 0);
860 		return tot_write;
861 	}
862 
863 	ret = wait_on_input(tcpu, nonblock);
864 	if (ret <= 0)
865 		return ret;
866 
867 	if (tcpu->flags & TC_NONBLOCK)
868 		mode |= SPLICE_F_NONBLOCK;
869 
870 	ret = init_splice(tcpu);
871 	if (ret < 0)
872 		return ret;
873 
874 	tot = splice(tcpu->fd, NULL, tcpu->splice_pipe[1], NULL,
875 		     tcpu->pipe_size, mode);
876 	if (tot < 0)
877 		return tot;
878 
879 	if (tot == 0)
880 		return 0;
881 
882 	ret = splice(tcpu->splice_pipe[0], NULL, wfd, NULL,
883 		     tot, SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
884 
885 	if (ret >= 0)
886 		return ret;
887 
888 	/* Some file systems do not allow splicing, try writing instead */
889 	do {
890 		int r = tcpu->subbuf_size;
891 
892 		if (r > tot)
893 			r = tot;
894 
895 		ret = read(tcpu->splice_pipe[0], buffer, r);
896 		if (ret > 0) {
897 			tot -= ret;
898 			ret = write(wfd, buffer, ret);
899 		}
900 		if (ret > 0)
901 			tot_write += ret;
902 	} while (ret > 0);
903 
904 	if (ret < 0)
905 		return ret;
906 
907 	return tot_write;
908 }
909 
910 /**
911  * tracefs_cpu_pipe - Write the raw trace file into a pipe descriptor
912  * @tcpu: The descriptor representing the raw trace file
913  * @wfd: The write file descriptor to write the data to (must be a pipe)
914  * @nonblock: Hint to not block on the read if there's no data.
915  *
916  * This will splice directly the file descriptor of the trace_pipe_raw
917  * file to the given @wfd, which must be a pipe. This can also be used
918  * if @tcpu was created with tracefs_cpu_create_fd() where the passed
919  * in @fd there was a pipe, then @wfd does not need to be a pipe.
920  *
921  * Returns the number of bytes read or negative on error.
922  */
tracefs_cpu_pipe(struct tracefs_cpu * tcpu,int wfd,bool nonblock)923 int tracefs_cpu_pipe(struct tracefs_cpu *tcpu, int wfd, bool nonblock)
924 {
925 	int mode = SPLICE_F_MOVE;
926 	int ret;
927 
928 	if (tcpu->mapping)
929 		return tracefs_cpu_write(tcpu, wfd, nonblock);
930 
931 	ret = wait_on_input(tcpu, nonblock);
932 	if (ret <= 0)
933 		return ret;
934 
935 	if (tcpu->flags & TC_NONBLOCK)
936 		mode |= SPLICE_F_NONBLOCK;
937 
938 	ret = splice(tcpu->fd, NULL, wfd, NULL,
939 		     tcpu->pipe_size, mode);
940 	return ret;
941 }
942