1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
4  *
5  */
6 #define _LARGEFILE64_SOURCE
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <fcntl.h>
10 #include <time.h>
11 #include <poll.h>
12 #include <unistd.h>
13 #include <errno.h>
14 
15 #include "tracefs.h"
16 #include "trace-cmd-private.h"
17 #include "trace-cmd-local.h"
18 #include "event-utils.h"
19 
20 /* F_GETPIPE_SZ was introduced in 2.6.35, older systems don't have it */
21 #ifndef F_GETPIPE_SZ
22 # define F_GETPIPE_SZ	1032 /* The Linux number for the option */
23 #endif
24 
25 #ifndef SPLICE_F_MOVE
26 # define SPLICE_F_MOVE		1
27 # define SPLICE_F_NONBLOCK	2
28 # define SPLICE_F_MORE		4
29 # define SPLICE_F_GIFT		8
30 #endif
31 
32 #define POLL_TIMEOUT_MS		1000
33 
34 struct tracecmd_recorder {
35 	int		fd;
36 	int		fd1;
37 	int		fd2;
38 	int		trace_fd;
39 	int		brass[2];
40 	int		pipe_size;
41 	int		page_size;
42 	int		cpu;
43 	int		stop;
44 	int		max;
45 	int		pages;
46 	int		count;
47 	unsigned	fd_flags;
48 	unsigned	trace_fd_flags;
49 	unsigned	flags;
50 };
51 
append_file(int size,int dst,int src)52 static int append_file(int size, int dst, int src)
53 {
54 	char buf[size];
55 	int r;
56 
57 	lseek64(src, 0, SEEK_SET);
58 
59 	/* If there's an error, then we are pretty much screwed :-p */
60 	do {
61 		r = read(src, buf, size);
62 		if (r < 0)
63 			return r;
64 		r = write(dst, buf, r);
65 		if (r < 0)
66 			return r;
67 	} while (r);
68 	return 0;
69 }
70 
tracecmd_free_recorder(struct tracecmd_recorder * recorder)71 void tracecmd_free_recorder(struct tracecmd_recorder *recorder)
72 {
73 	if (!recorder)
74 		return;
75 
76 	if (recorder->max) {
77 		/* Need to put everything into fd1 */
78 		if (recorder->fd == recorder->fd1) {
79 			int ret;
80 			/*
81 			 * Crap, the older data is in fd2, and we need
82 			 * to append fd1 onto it, and then copy over to fd1
83 			 */
84 			ret = append_file(recorder->page_size,
85 					  recorder->fd2, recorder->fd1);
86 			/* Error on copying, then just keep fd1 */
87 			if (ret) {
88 				lseek64(recorder->fd1, 0, SEEK_END);
89 				goto close;
90 			}
91 			lseek64(recorder->fd1, 0, SEEK_SET);
92 			ftruncate(recorder->fd1, 0);
93 		}
94 		append_file(recorder->page_size, recorder->fd1, recorder->fd2);
95 	}
96  close:
97 	if (recorder->brass[0] >= 0)
98 		close(recorder->brass[0]);
99 
100 	if (recorder->brass[1] >= 0)
101 		close(recorder->brass[1]);
102 
103 	if (recorder->trace_fd >= 0)
104 		close(recorder->trace_fd);
105 
106 	if (recorder->fd1 >= 0)
107 		close(recorder->fd1);
108 
109 	if (recorder->fd2 >= 0)
110 		close(recorder->fd2);
111 
112 	free(recorder);
113 }
114 
set_nonblock(struct tracecmd_recorder * recorder)115 static void set_nonblock(struct tracecmd_recorder *recorder)
116 {
117 	long flags;
118 
119 	/* Do not block on reads */
120 	flags = fcntl(recorder->trace_fd, F_GETFL);
121 	fcntl(recorder->trace_fd, F_SETFL, flags | O_NONBLOCK);
122 
123 	/* Do not block on streams */
124 	recorder->fd_flags |= SPLICE_F_NONBLOCK;
125 }
126 
127 struct tracecmd_recorder *
tracecmd_create_buffer_recorder_fd2(int fd,int fd2,int cpu,unsigned flags,const char * buffer,int maxkb)128 tracecmd_create_buffer_recorder_fd2(int fd, int fd2, int cpu, unsigned flags,
129 				    const char *buffer, int maxkb)
130 {
131 	struct tracecmd_recorder *recorder;
132 	char *path = NULL;
133 	int pipe_size = 0;
134 	int ret;
135 
136 	recorder = malloc(sizeof(*recorder));
137 	if (!recorder)
138 		return NULL;
139 
140 	recorder->cpu = cpu;
141 	recorder->flags = flags;
142 
143 	recorder->fd_flags = SPLICE_F_MOVE;
144 
145 	if (!(recorder->flags & TRACECMD_RECORD_BLOCK_SPLICE))
146 		recorder->fd_flags |= SPLICE_F_NONBLOCK;
147 
148 	recorder->trace_fd_flags = SPLICE_F_MOVE;
149 
150 	/* Init to know what to free and release */
151 	recorder->trace_fd = -1;
152 	recorder->brass[0] = -1;
153 	recorder->brass[1] = -1;
154 
155 	recorder->page_size = getpagesize();
156 	if (maxkb) {
157 		int kb_per_page = recorder->page_size >> 10;
158 
159 		if (!kb_per_page)
160 			kb_per_page = 1;
161 		recorder->max = maxkb / kb_per_page;
162 		/* keep max half */
163 		recorder->max >>= 1;
164 		if (!recorder->max)
165 			recorder->max = 1;
166 	} else
167 		recorder->max = 0;
168 
169 	recorder->count = 0;
170 	recorder->pages = 0;
171 
172 	/* fd always points to what to write to */
173 	recorder->fd = fd;
174 	recorder->fd1 = fd;
175 	recorder->fd2 = fd2;
176 
177 	if (buffer) {
178 		if (flags & TRACECMD_RECORD_SNAPSHOT)
179 			ret = asprintf(&path, "%s/per_cpu/cpu%d/snapshot_raw",
180 				       buffer, cpu);
181 		else
182 			ret = asprintf(&path, "%s/per_cpu/cpu%d/trace_pipe_raw",
183 				       buffer, cpu);
184 		if (ret < 0)
185 			goto out_free;
186 
187 		recorder->trace_fd = open(path, O_RDONLY);
188 		free(path);
189 
190 		if (recorder->trace_fd < 0)
191 			goto out_free;
192 	}
193 
194 	if (!(recorder->flags & (TRACECMD_RECORD_NOSPLICE |
195 				 TRACECMD_RECORD_NOBRASS))) {
196 		ret = pipe(recorder->brass);
197 		if (ret < 0)
198 			goto out_free;
199 
200 		ret = fcntl(recorder->brass[0], F_GETPIPE_SZ, &pipe_size);
201 		/*
202 		 * F_GETPIPE_SZ was introduced in 2.6.35, ftrace was introduced
203 		 * in 2.6.31. If we are running on an older kernel, just fall
204 		 * back to using page_size for splice(). It could also return
205 		 * success, but not modify pipe_size.
206 		 */
207 		if (ret < 0 || !pipe_size)
208 			pipe_size = recorder->page_size;
209 
210 		recorder->pipe_size = pipe_size;
211 	}
212 
213 	if (recorder->flags & TRACECMD_RECORD_POLL)
214 		set_nonblock(recorder);
215 
216 	return recorder;
217 
218  out_free:
219 	tracecmd_free_recorder(recorder);
220 	return NULL;
221 }
222 
223 struct tracecmd_recorder *
tracecmd_create_buffer_recorder_fd(int fd,int cpu,unsigned flags,const char * buffer)224 tracecmd_create_buffer_recorder_fd(int fd, int cpu, unsigned flags, const char *buffer)
225 {
226 	return tracecmd_create_buffer_recorder_fd2(fd, -1, cpu, flags, buffer, 0);
227 }
228 
229 static struct tracecmd_recorder *
__tracecmd_create_buffer_recorder(const char * file,int cpu,unsigned flags,const char * buffer)230 __tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags,
231 				  const char *buffer)
232 {
233 	struct tracecmd_recorder *recorder;
234 	int fd;
235 
236 	fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644);
237 	if (fd < 0)
238 		return NULL;
239 
240 	recorder = tracecmd_create_buffer_recorder_fd(fd, cpu, flags, buffer);
241 	if (!recorder) {
242 		close(fd);
243 		unlink(file);
244 	}
245 
246 	return recorder;
247 }
248 
249 struct tracecmd_recorder *
tracecmd_create_buffer_recorder_maxkb(const char * file,int cpu,unsigned flags,const char * buffer,int maxkb)250 tracecmd_create_buffer_recorder_maxkb(const char *file, int cpu, unsigned flags,
251 				      const char *buffer, int maxkb)
252 {
253 	struct tracecmd_recorder *recorder = NULL;
254 	char *file2;
255 	int len;
256 	int fd;
257 	int fd2;
258 
259 	if (!maxkb)
260 		return tracecmd_create_buffer_recorder(file, cpu, flags, buffer);
261 
262 	len = strlen(file);
263 	file2 = malloc(len + 3);
264 	if (!file2)
265 		return NULL;
266 
267 	sprintf(file2, "%s.1", file);
268 
269 	fd = open(file, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0644);
270 	if (fd < 0)
271 		goto out;
272 
273 	fd2 = open(file2, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0644);
274 	if (fd2 < 0)
275 		goto err;
276 
277 	recorder = tracecmd_create_buffer_recorder_fd2(fd, fd2, cpu, flags, buffer, maxkb);
278 	if (!recorder)
279 		goto err2;
280  out:
281 	/* Unlink file2, we need to add everything to file at the end */
282 	unlink(file2);
283 	free(file2);
284 
285 	return recorder;
286  err2:
287 	close(fd2);
288  err:
289 	close(fd);
290 	unlink(file);
291 	goto out;
292 }
293 
294 struct tracecmd_recorder *
tracecmd_create_buffer_recorder(const char * file,int cpu,unsigned flags,const char * buffer)295 tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags,
296 				const char *buffer)
297 {
298 	return __tracecmd_create_buffer_recorder(file, cpu, flags, buffer);
299 }
300 
301 /**
302  * tracecmd_create_recorder_virt - Create a recorder reading tracing data
303  * from the trace_fd file descriptor instead of from the local tracefs
304  * @file: output filename where tracing data will be written
305  * @cpu: which CPU is being traced
306  * @flags: flags configuring the recorder (see TRACECMD_RECORDER_* enums)
307  * @trace_fd: file descriptor from where tracing data will be read
308  */
309 struct tracecmd_recorder *
tracecmd_create_recorder_virt(const char * file,int cpu,unsigned flags,int trace_fd)310 tracecmd_create_recorder_virt(const char *file, int cpu, unsigned flags,
311 			      int trace_fd)
312 {
313 	struct tracecmd_recorder *recorder;
314 
315 	recorder = __tracecmd_create_buffer_recorder(file, cpu, flags, NULL);
316 	if (recorder)
317 		recorder->trace_fd = trace_fd;
318 
319 	return recorder;
320 }
321 
tracecmd_create_recorder_fd(int fd,int cpu,unsigned flags)322 struct tracecmd_recorder *tracecmd_create_recorder_fd(int fd, int cpu, unsigned flags)
323 {
324 	const char *tracing;
325 
326 	tracing = tracefs_tracing_dir();
327 	if (!tracing) {
328 		errno = ENODEV;
329 		return NULL;
330 	}
331 
332 	return tracecmd_create_buffer_recorder_fd(fd, cpu, flags, tracing);
333 }
334 
tracecmd_create_recorder(const char * file,int cpu,unsigned flags)335 struct tracecmd_recorder *tracecmd_create_recorder(const char *file, int cpu, unsigned flags)
336 {
337 	const char *tracing;
338 
339 	tracing = tracefs_tracing_dir();
340 	if (!tracing) {
341 		errno = ENODEV;
342 		return NULL;
343 	}
344 
345 	return tracecmd_create_buffer_recorder(file, cpu, flags, tracing);
346 }
347 
348 struct tracecmd_recorder *
tracecmd_create_recorder_maxkb(const char * file,int cpu,unsigned flags,int maxkb)349 tracecmd_create_recorder_maxkb(const char *file, int cpu, unsigned flags, int maxkb)
350 {
351 	const char *tracing;
352 
353 	tracing = tracefs_tracing_dir();
354 	if (!tracing) {
355 		errno = ENODEV;
356 		return NULL;
357 	}
358 
359 	return tracecmd_create_buffer_recorder_maxkb(file, cpu, flags, tracing, maxkb);
360 }
361 
update_fd(struct tracecmd_recorder * recorder,int size)362 static inline void update_fd(struct tracecmd_recorder *recorder, int size)
363 {
364 	int fd;
365 
366 	if (!recorder->max)
367 		return;
368 
369 	recorder->count += size;
370 
371 	if (recorder->count >= recorder->page_size) {
372 		recorder->count = 0;
373 		recorder->pages++;
374 	}
375 
376 	if (recorder->pages < recorder->max)
377 		return;
378 
379 	recorder->pages = 0;
380 
381 	fd = recorder->fd;
382 
383 	/* Swap fd to next file. */
384 	if (fd == recorder->fd1)
385 		fd = recorder->fd2;
386 	else
387 		fd = recorder->fd1;
388 
389 	/* Zero out the new file we are writing to */
390 	lseek64(fd, 0, SEEK_SET);
391 	ftruncate(fd, 0);
392 
393 	recorder->fd = fd;
394 }
395 
396 /*
397  * Returns -1 on error.
398  *          or bytes of data read.
399  */
splice_data(struct tracecmd_recorder * recorder)400 static long splice_data(struct tracecmd_recorder *recorder)
401 {
402 	long total_read = 0;
403 	long read;
404 	long ret;
405 
406 	read = splice(recorder->trace_fd, NULL, recorder->brass[1], NULL,
407 		      recorder->pipe_size, recorder->trace_fd_flags);
408 	if (read < 0) {
409 		if (errno == EAGAIN || errno == EINTR || errno == ENOTCONN)
410 			return 0;
411 
412 		tracecmd_warning("recorder error in splice input");
413 		return -1;
414 	} else if (read == 0)
415 		return 0;
416 
417  again:
418 	ret = splice(recorder->brass[0], NULL, recorder->fd, NULL,
419 		     read, recorder->fd_flags);
420 	if (ret < 0) {
421 		if (errno != EAGAIN && errno != EINTR) {
422 			tracecmd_warning("recorder error in splice output");
423 			return -1;
424 		}
425 		return total_read;
426 	} else
427 		update_fd(recorder, ret);
428 	total_read = ret;
429 	read -= ret;
430 	if (read)
431 		goto again;
432 
433 	return total_read;
434 }
435 
436 /*
437  * Returns -1 on error.
438  *          or bytes of data read.
439  */
direct_splice_data(struct tracecmd_recorder * recorder)440 static long direct_splice_data(struct tracecmd_recorder *recorder)
441 {
442 	struct pollfd pfd = {
443 		.fd = recorder->trace_fd,
444 		.events = POLLIN,
445 	};
446 	long read;
447 	int ret;
448 
449 	/*
450 	 * splice(2) in Linux used to not check O_NONBLOCK flag of pipe file
451 	 * descriptors before [1]. To avoid getting blocked in the splice(2)
452 	 * call below after the user had requested to stop tracing, we poll(2)
453 	 * here. This poll() is not necessary on newer kernels.
454 	 *
455 	 * [1] https://github.com/torvalds/linux/commit/ee5e001196d1345b8fee25925ff5f1d67936081e
456 	 */
457 	ret = poll(&pfd, 1, POLL_TIMEOUT_MS);
458 	if (ret < 0)
459 		return -1;
460 
461 	if (!(pfd.revents | POLLIN))
462 		return 0;
463 
464 	read = splice(recorder->trace_fd, NULL, recorder->fd, NULL,
465 		      recorder->pipe_size, recorder->fd_flags);
466 	if (read < 0) {
467 		if (errno == EAGAIN || errno == EINTR || errno == ENOTCONN)
468 			return 0;
469 
470 		tracecmd_warning("recorder error in splice input");
471 		return -1;
472 	}
473 
474 	return read;
475 }
476 
477 /*
478  * Returns -1 on error.
479  *          or bytes of data read.
480  */
read_data(struct tracecmd_recorder * recorder)481 static long read_data(struct tracecmd_recorder *recorder)
482 {
483 	char buf[recorder->page_size];
484 	long left;
485 	long r, w;
486 
487 	r = read(recorder->trace_fd, buf, recorder->page_size);
488 	if (r < 0) {
489 		if (errno == EAGAIN || errno == EINTR || errno == ENOTCONN)
490 			return 0;
491 
492 		tracecmd_warning("recorder error in read input");
493 		return -1;
494 	}
495 
496 	left = r;
497 	do {
498 		w = write(recorder->fd, buf + (r - left), left);
499 		if (w > 0) {
500 			left -= w;
501 			update_fd(recorder, w);
502 		}
503 	} while (w >= 0 && left);
504 
505 	if (w < 0)
506 		r = w;
507 
508 	return r;
509 }
510 
move_data(struct tracecmd_recorder * recorder)511 static long move_data(struct tracecmd_recorder *recorder)
512 {
513 	if (recorder->flags & TRACECMD_RECORD_NOSPLICE)
514 		return read_data(recorder);
515 
516 	if (recorder->flags & TRACECMD_RECORD_NOBRASS)
517 		return direct_splice_data(recorder);
518 
519 	return splice_data(recorder);
520 }
521 
tracecmd_flush_recording(struct tracecmd_recorder * recorder)522 long tracecmd_flush_recording(struct tracecmd_recorder *recorder)
523 {
524 	char buf[recorder->page_size];
525 	long total = 0;
526 	long wrote = 0;
527 	long ret;
528 
529 	set_nonblock(recorder);
530 
531 	do {
532 		ret = move_data(recorder);
533 		if (ret < 0)
534 			return ret;
535 		total += ret;
536 	} while (ret);
537 
538 	/* splice only reads full pages */
539 	do {
540 		ret = read(recorder->trace_fd, buf, recorder->page_size);
541 		if (ret > 0) {
542 			write(recorder->fd, buf, ret);
543 			wrote += ret;
544 		}
545 
546 	} while (ret > 0);
547 
548 	/* Make sure we finish off with a page size boundary */
549 	wrote &= recorder->page_size - 1;
550 	if (wrote) {
551 		memset(buf, 0, recorder->page_size);
552 		write(recorder->fd, buf, recorder->page_size - wrote);
553 		total += recorder->page_size;
554 	}
555 
556 	return total;
557 }
558 
tracecmd_start_recording(struct tracecmd_recorder * recorder,unsigned long sleep)559 int tracecmd_start_recording(struct tracecmd_recorder *recorder, unsigned long sleep)
560 {
561 	struct timespec req = {
562 		.tv_sec = sleep / 1000000,
563 		.tv_nsec = (sleep % 1000000) * 1000,
564 	};
565 	long read = 1;
566 	long ret;
567 
568 	recorder->stop = 0;
569 
570 	do {
571 		/* Only sleep if we did not read anything last time */
572 		if (!read && sleep)
573 			nanosleep(&req, NULL);
574 
575 		read = 0;
576 		do {
577 			ret = move_data(recorder);
578 			if (ret < 0)
579 				return ret;
580 			read += ret;
581 		} while (ret);
582 	} while (!recorder->stop);
583 
584 	/* Flush out the rest */
585 	ret = tracecmd_flush_recording(recorder);
586 
587 	if (ret < 0)
588 		return ret;
589 
590 	return 0;
591 }
592 
tracecmd_stop_recording(struct tracecmd_recorder * recorder)593 void tracecmd_stop_recording(struct tracecmd_recorder *recorder)
594 {
595 	if (!recorder)
596 		return;
597 
598 	set_nonblock(recorder);
599 
600 	recorder->stop = 1;
601 }
602