• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * libcfs/libcfs/tracefile.c
33  *
34  * Author: Zach Brown <zab@clusterfs.com>
35  * Author: Phil Schwan <phil@clusterfs.com>
36  */
37 
38 #define DEBUG_SUBSYSTEM S_LNET
39 #define LUSTRE_TRACEFILE_PRIVATE
40 #include "tracefile.h"
41 
42 #include "../../include/linux/libcfs/libcfs.h"
43 
44 /* XXX move things up to the top, comment */
45 union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
46 
47 char cfs_tracefile[TRACEFILE_NAME_SIZE];
48 long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
49 static struct tracefiled_ctl trace_tctl;
50 static DEFINE_MUTEX(cfs_trace_thread_mutex);
51 static int thread_running;
52 
53 static atomic_t cfs_tage_allocated = ATOMIC_INIT(0);
54 
55 struct page_collection {
56 	struct list_head	pc_pages;
57 	/*
58 	 * if this flag is set, collect_pages() will spill both
59 	 * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise,
60 	 * only ->tcd_pages are spilled.
61 	 */
62 	int		pc_want_daemon_pages;
63 };
64 
65 struct tracefiled_ctl {
66 	struct completion	tctl_start;
67 	struct completion	tctl_stop;
68 	wait_queue_head_t		tctl_waitq;
69 	pid_t			tctl_pid;
70 	atomic_t		tctl_shutdown;
71 };
72 
73 /*
74  * small data-structure for each page owned by tracefiled.
75  */
76 struct cfs_trace_page {
77 	/*
78 	 * page itself
79 	 */
80 	struct page	  *page;
81 	/*
82 	 * linkage into one of the lists in trace_data_union or
83 	 * page_collection
84 	 */
85 	struct list_head	   linkage;
86 	/*
87 	 * number of bytes used within this page
88 	 */
89 	unsigned int	 used;
90 	/*
91 	 * cpu that owns this page
92 	 */
93 	unsigned short       cpu;
94 	/*
95 	 * type(context) of this page
96 	 */
97 	unsigned short       type;
98 };
99 
100 static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
101 					 struct cfs_trace_cpu_data *tcd);
102 
103 static inline struct cfs_trace_page *
cfs_tage_from_list(struct list_head * list)104 cfs_tage_from_list(struct list_head *list)
105 {
106 	return list_entry(list, struct cfs_trace_page, linkage);
107 }
108 
cfs_tage_alloc(gfp_t gfp)109 static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp)
110 {
111 	struct page	    *page;
112 	struct cfs_trace_page *tage;
113 
114 	/* My caller is trying to free memory */
115 	if (!in_interrupt() && memory_pressure_get())
116 		return NULL;
117 
118 	/*
119 	 * Don't spam console with allocation failures: they will be reported
120 	 * by upper layer anyway.
121 	 */
122 	gfp |= __GFP_NOWARN;
123 	page = alloc_page(gfp);
124 	if (!page)
125 		return NULL;
126 
127 	tage = kmalloc(sizeof(*tage), gfp);
128 	if (!tage) {
129 		__free_page(page);
130 		return NULL;
131 	}
132 
133 	tage->page = page;
134 	atomic_inc(&cfs_tage_allocated);
135 	return tage;
136 }
137 
cfs_tage_free(struct cfs_trace_page * tage)138 static void cfs_tage_free(struct cfs_trace_page *tage)
139 {
140 	__free_page(tage->page);
141 	kfree(tage);
142 	atomic_dec(&cfs_tage_allocated);
143 }
144 
cfs_tage_to_tail(struct cfs_trace_page * tage,struct list_head * queue)145 static void cfs_tage_to_tail(struct cfs_trace_page *tage,
146 			     struct list_head *queue)
147 {
148 	list_move_tail(&tage->linkage, queue);
149 }
150 
cfs_trace_refill_stock(struct cfs_trace_cpu_data * tcd,gfp_t gfp,struct list_head * stock)151 int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
152 			   struct list_head *stock)
153 {
154 	int i;
155 
156 	/*
157 	 * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
158 	 * from here: this will lead to infinite recursion.
159 	 */
160 
161 	for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++i) {
162 		struct cfs_trace_page *tage;
163 
164 		tage = cfs_tage_alloc(gfp);
165 		if (!tage)
166 			break;
167 		list_add_tail(&tage->linkage, stock);
168 	}
169 	return i;
170 }
171 
172 /* return a page that has 'len' bytes left at the end */
173 static struct cfs_trace_page *
cfs_trace_get_tage_try(struct cfs_trace_cpu_data * tcd,unsigned long len)174 cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
175 {
176 	struct cfs_trace_page *tage;
177 
178 	if (tcd->tcd_cur_pages > 0) {
179 		__LASSERT(!list_empty(&tcd->tcd_pages));
180 		tage = cfs_tage_from_list(tcd->tcd_pages.prev);
181 		if (tage->used + len <= PAGE_SIZE)
182 			return tage;
183 	}
184 
185 	if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
186 		if (tcd->tcd_cur_stock_pages > 0) {
187 			tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev);
188 			--tcd->tcd_cur_stock_pages;
189 			list_del_init(&tage->linkage);
190 		} else {
191 			tage = cfs_tage_alloc(GFP_ATOMIC);
192 			if (unlikely(!tage)) {
193 				if ((!memory_pressure_get() ||
194 				     in_interrupt()) && printk_ratelimit())
195 					printk(KERN_WARNING
196 					       "cannot allocate a tage (%ld)\n",
197 					       tcd->tcd_cur_pages);
198 				return NULL;
199 			}
200 		}
201 
202 		tage->used = 0;
203 		tage->cpu = smp_processor_id();
204 		tage->type = tcd->tcd_type;
205 		list_add_tail(&tage->linkage, &tcd->tcd_pages);
206 		tcd->tcd_cur_pages++;
207 
208 		if (tcd->tcd_cur_pages > 8 && thread_running) {
209 			struct tracefiled_ctl *tctl = &trace_tctl;
210 			/*
211 			 * wake up tracefiled to process some pages.
212 			 */
213 			wake_up(&tctl->tctl_waitq);
214 		}
215 		return tage;
216 	}
217 	return NULL;
218 }
219 
cfs_tcd_shrink(struct cfs_trace_cpu_data * tcd)220 static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
221 {
222 	int pgcount = tcd->tcd_cur_pages / 10;
223 	struct page_collection pc;
224 	struct cfs_trace_page *tage;
225 	struct cfs_trace_page *tmp;
226 
227 	/*
228 	 * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
229 	 * from here: this will lead to infinite recursion.
230 	 */
231 
232 	if (printk_ratelimit())
233 		printk(KERN_WARNING "debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n",
234 		       pgcount + 1, tcd->tcd_cur_pages);
235 
236 	INIT_LIST_HEAD(&pc.pc_pages);
237 
238 	list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
239 		if (pgcount-- == 0)
240 			break;
241 
242 		list_move_tail(&tage->linkage, &pc.pc_pages);
243 		tcd->tcd_cur_pages--;
244 	}
245 	put_pages_on_tcd_daemon_list(&pc, tcd);
246 }
247 
248 /* return a page that has 'len' bytes left at the end */
cfs_trace_get_tage(struct cfs_trace_cpu_data * tcd,unsigned long len)249 static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
250 						 unsigned long len)
251 {
252 	struct cfs_trace_page *tage;
253 
254 	/*
255 	 * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
256 	 * from here: this will lead to infinite recursion.
257 	 */
258 
259 	if (len > PAGE_SIZE) {
260 		pr_err("cowardly refusing to write %lu bytes in a page\n", len);
261 		return NULL;
262 	}
263 
264 	tage = cfs_trace_get_tage_try(tcd, len);
265 	if (tage)
266 		return tage;
267 	if (thread_running)
268 		cfs_tcd_shrink(tcd);
269 	if (tcd->tcd_cur_pages > 0) {
270 		tage = cfs_tage_from_list(tcd->tcd_pages.next);
271 		tage->used = 0;
272 		cfs_tage_to_tail(tage, &tcd->tcd_pages);
273 	}
274 	return tage;
275 }
276 
libcfs_debug_msg(struct libcfs_debug_msg_data * msgdata,const char * format,...)277 int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
278 		     const char *format, ...)
279 {
280 	va_list args;
281 	int     rc;
282 
283 	va_start(args, format);
284 	rc = libcfs_debug_vmsg2(msgdata, format, args, NULL);
285 	va_end(args);
286 
287 	return rc;
288 }
289 EXPORT_SYMBOL(libcfs_debug_msg);
290 
libcfs_debug_vmsg2(struct libcfs_debug_msg_data * msgdata,const char * format1,va_list args,const char * format2,...)291 int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
292 		       const char *format1, va_list args,
293 		       const char *format2, ...)
294 {
295 	struct cfs_trace_cpu_data *tcd = NULL;
296 	struct ptldebug_header     header = {0};
297 	struct cfs_trace_page     *tage;
298 	/* string_buf is used only if tcd != NULL, and is always set then */
299 	char		      *string_buf = NULL;
300 	char		      *debug_buf;
301 	int			known_size;
302 	int			needed = 85; /* average message length */
303 	int			max_nob;
304 	va_list		    ap;
305 	int			depth;
306 	int			i;
307 	int			remain;
308 	int			mask = msgdata->msg_mask;
309 	const char		*file = kbasename(msgdata->msg_file);
310 	struct cfs_debug_limit_state   *cdls = msgdata->msg_cdls;
311 
312 	tcd = cfs_trace_get_tcd();
313 
314 	/* cfs_trace_get_tcd() grabs a lock, which disables preemption and
315 	 * pins us to a particular CPU.  This avoids an smp_processor_id()
316 	 * warning on Linux when debugging is enabled.
317 	 */
318 	cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK());
319 
320 	if (!tcd)		/* arch may not log in IRQ context */
321 		goto console;
322 
323 	if (tcd->tcd_cur_pages == 0)
324 		header.ph_flags |= PH_FLAG_FIRST_RECORD;
325 
326 	if (tcd->tcd_shutting_down) {
327 		cfs_trace_put_tcd(tcd);
328 		tcd = NULL;
329 		goto console;
330 	}
331 
332 	depth = __current_nesting_level();
333 	known_size = strlen(file) + 1 + depth;
334 	if (msgdata->msg_fn)
335 		known_size += strlen(msgdata->msg_fn) + 1;
336 
337 	if (libcfs_debug_binary)
338 		known_size += sizeof(header);
339 
340 	/*
341 	 * '2' used because vsnprintf return real size required for output
342 	 * _without_ terminating NULL.
343 	 * if needed is to small for this format.
344 	 */
345 	for (i = 0; i < 2; i++) {
346 		tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
347 		if (!tage) {
348 			if (needed + known_size > PAGE_SIZE)
349 				mask |= D_ERROR;
350 
351 			cfs_trace_put_tcd(tcd);
352 			tcd = NULL;
353 			goto console;
354 		}
355 
356 		string_buf = (char *)page_address(tage->page) +
357 					tage->used + known_size;
358 
359 		max_nob = PAGE_SIZE - tage->used - known_size;
360 		if (max_nob <= 0) {
361 			printk(KERN_EMERG "negative max_nob: %d\n",
362 			       max_nob);
363 			mask |= D_ERROR;
364 			cfs_trace_put_tcd(tcd);
365 			tcd = NULL;
366 			goto console;
367 		}
368 
369 		needed = 0;
370 		if (format1) {
371 			va_copy(ap, args);
372 			needed = vsnprintf(string_buf, max_nob, format1, ap);
373 			va_end(ap);
374 		}
375 
376 		if (format2) {
377 			remain = max_nob - needed;
378 			if (remain < 0)
379 				remain = 0;
380 
381 			va_start(ap, format2);
382 			needed += vsnprintf(string_buf + needed, remain,
383 					    format2, ap);
384 			va_end(ap);
385 		}
386 
387 		if (needed < max_nob) /* well. printing ok.. */
388 			break;
389 	}
390 
391 	if (*(string_buf + needed - 1) != '\n')
392 		printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
393 		       file, msgdata->msg_line, msgdata->msg_fn);
394 
395 	header.ph_len = known_size + needed;
396 	debug_buf = (char *)page_address(tage->page) + tage->used;
397 
398 	if (libcfs_debug_binary) {
399 		memcpy(debug_buf, &header, sizeof(header));
400 		tage->used += sizeof(header);
401 		debug_buf += sizeof(header);
402 	}
403 
404 	/* indent message according to the nesting level */
405 	while (depth-- > 0) {
406 		*(debug_buf++) = '.';
407 		++tage->used;
408 	}
409 
410 	strcpy(debug_buf, file);
411 	tage->used += strlen(file) + 1;
412 	debug_buf += strlen(file) + 1;
413 
414 	if (msgdata->msg_fn) {
415 		strcpy(debug_buf, msgdata->msg_fn);
416 		tage->used += strlen(msgdata->msg_fn) + 1;
417 		debug_buf += strlen(msgdata->msg_fn) + 1;
418 	}
419 
420 	__LASSERT(debug_buf == string_buf);
421 
422 	tage->used += needed;
423 	__LASSERT(tage->used <= PAGE_SIZE);
424 
425 console:
426 	if ((mask & libcfs_printk) == 0) {
427 		/* no console output requested */
428 		if (tcd)
429 			cfs_trace_put_tcd(tcd);
430 		return 1;
431 	}
432 
433 	if (cdls) {
434 		if (libcfs_console_ratelimit &&
435 		    cdls->cdls_next != 0 &&     /* not first time ever */
436 		    !cfs_time_after(cfs_time_current(), cdls->cdls_next)) {
437 			/* skipping a console message */
438 			cdls->cdls_count++;
439 			if (tcd)
440 				cfs_trace_put_tcd(tcd);
441 			return 1;
442 		}
443 
444 		if (cfs_time_after(cfs_time_current(),
445 				   cdls->cdls_next + libcfs_console_max_delay +
446 				   cfs_time_seconds(10))) {
447 			/* last timeout was a long time ago */
448 			cdls->cdls_delay /= libcfs_console_backoff * 4;
449 		} else {
450 			cdls->cdls_delay *= libcfs_console_backoff;
451 		}
452 
453 		if (cdls->cdls_delay < libcfs_console_min_delay)
454 			cdls->cdls_delay = libcfs_console_min_delay;
455 		else if (cdls->cdls_delay > libcfs_console_max_delay)
456 			cdls->cdls_delay = libcfs_console_max_delay;
457 
458 		/* ensure cdls_next is never zero after it's been seen */
459 		cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1;
460 	}
461 
462 	if (tcd) {
463 		cfs_print_to_console(&header, mask, string_buf, needed, file,
464 				     msgdata->msg_fn);
465 		cfs_trace_put_tcd(tcd);
466 	} else {
467 		string_buf = cfs_trace_get_console_buffer();
468 
469 		needed = 0;
470 		if (format1) {
471 			va_copy(ap, args);
472 			needed = vsnprintf(string_buf,
473 					   CFS_TRACE_CONSOLE_BUFFER_SIZE,
474 					   format1, ap);
475 			va_end(ap);
476 		}
477 		if (format2) {
478 			remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed;
479 			if (remain > 0) {
480 				va_start(ap, format2);
481 				needed += vsnprintf(string_buf + needed, remain,
482 						    format2, ap);
483 				va_end(ap);
484 			}
485 		}
486 		cfs_print_to_console(&header, mask,
487 				     string_buf, needed, file, msgdata->msg_fn);
488 
489 		put_cpu();
490 	}
491 
492 	if (cdls && cdls->cdls_count != 0) {
493 		string_buf = cfs_trace_get_console_buffer();
494 
495 		needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
496 				  "Skipped %d previous similar message%s\n",
497 				  cdls->cdls_count,
498 				  (cdls->cdls_count > 1) ? "s" : "");
499 
500 		cfs_print_to_console(&header, mask,
501 				     string_buf, needed, file, msgdata->msg_fn);
502 
503 		put_cpu();
504 		cdls->cdls_count = 0;
505 	}
506 
507 	return 0;
508 }
509 EXPORT_SYMBOL(libcfs_debug_vmsg2);
510 
511 void
cfs_trace_assertion_failed(const char * str,struct libcfs_debug_msg_data * msgdata)512 cfs_trace_assertion_failed(const char *str,
513 			   struct libcfs_debug_msg_data *msgdata)
514 {
515 	struct ptldebug_header hdr;
516 
517 	libcfs_panic_in_progress = 1;
518 	libcfs_catastrophe = 1;
519 	mb();
520 
521 	cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK());
522 
523 	cfs_print_to_console(&hdr, D_EMERG, str, strlen(str),
524 			     msgdata->msg_file, msgdata->msg_fn);
525 
526 	panic("Lustre debug assertion failure\n");
527 
528 	/* not reached */
529 }
530 
531 static void
panic_collect_pages(struct page_collection * pc)532 panic_collect_pages(struct page_collection *pc)
533 {
534 	/* Do the collect_pages job on a single CPU: assumes that all other
535 	 * CPUs have been stopped during a panic.  If this isn't true for some
536 	 * arch, this will have to be implemented separately in each arch.
537 	 */
538 	int			i;
539 	int			j;
540 	struct cfs_trace_cpu_data *tcd;
541 
542 	INIT_LIST_HEAD(&pc->pc_pages);
543 
544 	cfs_tcd_for_each(tcd, i, j) {
545 		list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
546 		tcd->tcd_cur_pages = 0;
547 
548 		if (pc->pc_want_daemon_pages) {
549 			list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
550 			tcd->tcd_cur_daemon_pages = 0;
551 		}
552 	}
553 }
554 
collect_pages_on_all_cpus(struct page_collection * pc)555 static void collect_pages_on_all_cpus(struct page_collection *pc)
556 {
557 	struct cfs_trace_cpu_data *tcd;
558 	int i, cpu;
559 
560 	for_each_possible_cpu(cpu) {
561 		cfs_tcd_for_each_type_lock(tcd, i, cpu) {
562 			list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
563 			tcd->tcd_cur_pages = 0;
564 			if (pc->pc_want_daemon_pages) {
565 				list_splice_init(&tcd->tcd_daemon_pages,
566 						 &pc->pc_pages);
567 				tcd->tcd_cur_daemon_pages = 0;
568 			}
569 		}
570 	}
571 }
572 
collect_pages(struct page_collection * pc)573 static void collect_pages(struct page_collection *pc)
574 {
575 	INIT_LIST_HEAD(&pc->pc_pages);
576 
577 	if (libcfs_panic_in_progress)
578 		panic_collect_pages(pc);
579 	else
580 		collect_pages_on_all_cpus(pc);
581 }
582 
put_pages_back_on_all_cpus(struct page_collection * pc)583 static void put_pages_back_on_all_cpus(struct page_collection *pc)
584 {
585 	struct cfs_trace_cpu_data *tcd;
586 	struct list_head *cur_head;
587 	struct cfs_trace_page *tage;
588 	struct cfs_trace_page *tmp;
589 	int i, cpu;
590 
591 	for_each_possible_cpu(cpu) {
592 		cfs_tcd_for_each_type_lock(tcd, i, cpu) {
593 			cur_head = tcd->tcd_pages.next;
594 
595 			list_for_each_entry_safe(tage, tmp, &pc->pc_pages,
596 						 linkage) {
597 				__LASSERT_TAGE_INVARIANT(tage);
598 
599 				if (tage->cpu != cpu || tage->type != i)
600 					continue;
601 
602 				cfs_tage_to_tail(tage, cur_head);
603 				tcd->tcd_cur_pages++;
604 			}
605 		}
606 	}
607 }
608 
put_pages_back(struct page_collection * pc)609 static void put_pages_back(struct page_collection *pc)
610 {
611 	if (!libcfs_panic_in_progress)
612 		put_pages_back_on_all_cpus(pc);
613 }
614 
615 /* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
616  * we have a good amount of data at all times for dumping during an LBUG, even
617  * if we have been steadily writing (and otherwise discarding) pages via the
618  * debug daemon.
619  */
put_pages_on_tcd_daemon_list(struct page_collection * pc,struct cfs_trace_cpu_data * tcd)620 static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
621 					 struct cfs_trace_cpu_data *tcd)
622 {
623 	struct cfs_trace_page *tage;
624 	struct cfs_trace_page *tmp;
625 
626 	list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
627 		__LASSERT_TAGE_INVARIANT(tage);
628 
629 		if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
630 			continue;
631 
632 		cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages);
633 		tcd->tcd_cur_daemon_pages++;
634 
635 		if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
636 			struct cfs_trace_page *victim;
637 
638 			__LASSERT(!list_empty(&tcd->tcd_daemon_pages));
639 			victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next);
640 
641 			__LASSERT_TAGE_INVARIANT(victim);
642 
643 			list_del(&victim->linkage);
644 			cfs_tage_free(victim);
645 			tcd->tcd_cur_daemon_pages--;
646 		}
647 	}
648 }
649 
put_pages_on_daemon_list(struct page_collection * pc)650 static void put_pages_on_daemon_list(struct page_collection *pc)
651 {
652 	struct cfs_trace_cpu_data *tcd;
653 	int i, cpu;
654 
655 	for_each_possible_cpu(cpu) {
656 		cfs_tcd_for_each_type_lock(tcd, i, cpu)
657 			put_pages_on_tcd_daemon_list(pc, tcd);
658 	}
659 }
660 
cfs_trace_debug_print(void)661 void cfs_trace_debug_print(void)
662 {
663 	struct page_collection pc;
664 	struct cfs_trace_page *tage;
665 	struct cfs_trace_page *tmp;
666 
667 	pc.pc_want_daemon_pages = 1;
668 	collect_pages(&pc);
669 	list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
670 		char *p, *file, *fn;
671 		struct page *page;
672 
673 		__LASSERT_TAGE_INVARIANT(tage);
674 
675 		page = tage->page;
676 		p = page_address(page);
677 		while (p < ((char *)page_address(page) + tage->used)) {
678 			struct ptldebug_header *hdr;
679 			int len;
680 
681 			hdr = (void *)p;
682 			p += sizeof(*hdr);
683 			file = p;
684 			p += strlen(file) + 1;
685 			fn = p;
686 			p += strlen(fn) + 1;
687 			len = hdr->ph_len - (int)(p - (char *)hdr);
688 
689 			cfs_print_to_console(hdr, D_EMERG, p, len, file, fn);
690 
691 			p += len;
692 		}
693 
694 		list_del(&tage->linkage);
695 		cfs_tage_free(tage);
696 	}
697 }
698 
cfs_tracefile_dump_all_pages(char * filename)699 int cfs_tracefile_dump_all_pages(char *filename)
700 {
701 	struct page_collection	pc;
702 	struct file		*filp;
703 	struct cfs_trace_page	*tage;
704 	struct cfs_trace_page	*tmp;
705 	char			*buf;
706 	mm_segment_t __oldfs;
707 	int rc;
708 
709 	cfs_tracefile_write_lock();
710 
711 	filp = filp_open(filename, O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE,
712 			 0600);
713 	if (IS_ERR(filp)) {
714 		rc = PTR_ERR(filp);
715 		filp = NULL;
716 		pr_err("LustreError: can't open %s for dump: rc %d\n",
717 		       filename, rc);
718 		goto out;
719 	}
720 
721 	pc.pc_want_daemon_pages = 1;
722 	collect_pages(&pc);
723 	if (list_empty(&pc.pc_pages)) {
724 		rc = 0;
725 		goto close;
726 	}
727 	__oldfs = get_fs();
728 	set_fs(get_ds());
729 
730 	/* ok, for now, just write the pages.  in the future we'll be building
731 	 * iobufs with the pages and calling generic_direct_IO
732 	 */
733 	list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
734 		__LASSERT_TAGE_INVARIANT(tage);
735 
736 		buf = kmap(tage->page);
737 		rc = vfs_write(filp, (__force const char __user *)buf,
738 			       tage->used, &filp->f_pos);
739 		kunmap(tage->page);
740 
741 		if (rc != (int)tage->used) {
742 			printk(KERN_WARNING "wanted to write %u but wrote %d\n",
743 			       tage->used, rc);
744 			put_pages_back(&pc);
745 			__LASSERT(list_empty(&pc.pc_pages));
746 			break;
747 		}
748 		list_del(&tage->linkage);
749 		cfs_tage_free(tage);
750 	}
751 	set_fs(__oldfs);
752 	rc = vfs_fsync(filp, 1);
753 	if (rc)
754 		pr_err("sync returns %d\n", rc);
755 close:
756 	filp_close(filp, NULL);
757 out:
758 	cfs_tracefile_write_unlock();
759 	return rc;
760 }
761 
cfs_trace_flush_pages(void)762 void cfs_trace_flush_pages(void)
763 {
764 	struct page_collection pc;
765 	struct cfs_trace_page *tage;
766 	struct cfs_trace_page *tmp;
767 
768 	pc.pc_want_daemon_pages = 1;
769 	collect_pages(&pc);
770 	list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
771 		__LASSERT_TAGE_INVARIANT(tage);
772 
773 		list_del(&tage->linkage);
774 		cfs_tage_free(tage);
775 	}
776 }
777 
cfs_trace_copyin_string(char * knl_buffer,int knl_buffer_nob,const char __user * usr_buffer,int usr_buffer_nob)778 int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
779 			    const char __user *usr_buffer, int usr_buffer_nob)
780 {
781 	int    nob;
782 
783 	if (usr_buffer_nob > knl_buffer_nob)
784 		return -EOVERFLOW;
785 
786 	if (copy_from_user((void *)knl_buffer,
787 			   usr_buffer, usr_buffer_nob))
788 		return -EFAULT;
789 
790 	nob = strnlen(knl_buffer, usr_buffer_nob);
791 	while (nob-- >= 0)		      /* strip trailing whitespace */
792 		if (!isspace(knl_buffer[nob]))
793 			break;
794 
795 	if (nob < 0)			    /* empty string */
796 		return -EINVAL;
797 
798 	if (nob == knl_buffer_nob)	      /* no space to terminate */
799 		return -EOVERFLOW;
800 
801 	knl_buffer[nob + 1] = 0;		/* terminate */
802 	return 0;
803 }
804 EXPORT_SYMBOL(cfs_trace_copyin_string);
805 
cfs_trace_copyout_string(char __user * usr_buffer,int usr_buffer_nob,const char * knl_buffer,char * append)806 int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
807 			     const char *knl_buffer, char *append)
808 {
809 	/*
810 	 * NB if 'append' != NULL, it's a single character to append to the
811 	 * copied out string - usually "\n" or "" (i.e. a terminating zero byte)
812 	 */
813 	int   nob = strlen(knl_buffer);
814 
815 	if (nob > usr_buffer_nob)
816 		nob = usr_buffer_nob;
817 
818 	if (copy_to_user(usr_buffer, knl_buffer, nob))
819 		return -EFAULT;
820 
821 	if (append && nob < usr_buffer_nob) {
822 		if (copy_to_user(usr_buffer + nob, append, 1))
823 			return -EFAULT;
824 
825 		nob++;
826 	}
827 
828 	return nob;
829 }
830 EXPORT_SYMBOL(cfs_trace_copyout_string);
831 
cfs_trace_allocate_string_buffer(char ** str,int nob)832 int cfs_trace_allocate_string_buffer(char **str, int nob)
833 {
834 	if (nob > 2 * PAGE_SIZE)	    /* string must be "sensible" */
835 		return -EINVAL;
836 
837 	*str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
838 	if (!*str)
839 		return -ENOMEM;
840 
841 	return 0;
842 }
843 
cfs_trace_dump_debug_buffer_usrstr(void __user * usr_str,int usr_str_nob)844 int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob)
845 {
846 	char	 *str;
847 	int	   rc;
848 
849 	rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
850 	if (rc != 0)
851 		return rc;
852 
853 	rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
854 				     usr_str, usr_str_nob);
855 	if (rc != 0)
856 		goto out;
857 
858 	if (str[0] != '/') {
859 		rc = -EINVAL;
860 		goto out;
861 	}
862 	rc = cfs_tracefile_dump_all_pages(str);
863 out:
864 	kfree(str);
865 	return rc;
866 }
867 
cfs_trace_daemon_command(char * str)868 int cfs_trace_daemon_command(char *str)
869 {
870 	int       rc = 0;
871 
872 	cfs_tracefile_write_lock();
873 
874 	if (strcmp(str, "stop") == 0) {
875 		cfs_tracefile_write_unlock();
876 		cfs_trace_stop_thread();
877 		cfs_tracefile_write_lock();
878 		memset(cfs_tracefile, 0, sizeof(cfs_tracefile));
879 
880 	} else if (strncmp(str, "size=", 5) == 0) {
881 		unsigned long tmp;
882 
883 		rc = kstrtoul(str + 5, 10, &tmp);
884 		if (!rc) {
885 			if (tmp < 10 || tmp > 20480)
886 				cfs_tracefile_size = CFS_TRACEFILE_SIZE;
887 			else
888 				cfs_tracefile_size = tmp << 20;
889 		}
890 	} else if (strlen(str) >= sizeof(cfs_tracefile)) {
891 		rc = -ENAMETOOLONG;
892 	} else if (str[0] != '/') {
893 		rc = -EINVAL;
894 	} else {
895 		strcpy(cfs_tracefile, str);
896 
897 		printk(KERN_INFO
898 		       "Lustre: debug daemon will attempt to start writing to %s (%lukB max)\n",
899 		       cfs_tracefile,
900 		       (long)(cfs_tracefile_size >> 10));
901 
902 		cfs_trace_start_thread();
903 	}
904 
905 	cfs_tracefile_write_unlock();
906 	return rc;
907 }
908 
cfs_trace_daemon_command_usrstr(void __user * usr_str,int usr_str_nob)909 int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob)
910 {
911 	char *str;
912 	int   rc;
913 
914 	rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
915 	if (rc != 0)
916 		return rc;
917 
918 	rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
919 				     usr_str, usr_str_nob);
920 	if (rc == 0)
921 		rc = cfs_trace_daemon_command(str);
922 
923 	kfree(str);
924 	return rc;
925 }
926 
cfs_trace_set_debug_mb(int mb)927 int cfs_trace_set_debug_mb(int mb)
928 {
929 	int i;
930 	int j;
931 	int pages;
932 	int limit = cfs_trace_max_debug_mb();
933 	struct cfs_trace_cpu_data *tcd;
934 
935 	if (mb < num_possible_cpus()) {
936 		printk(KERN_WARNING
937 		       "Lustre: %d MB is too small for debug buffer size, setting it to %d MB.\n",
938 		       mb, num_possible_cpus());
939 		mb = num_possible_cpus();
940 	}
941 
942 	if (mb > limit) {
943 		printk(KERN_WARNING
944 		       "Lustre: %d MB is too large for debug buffer size, setting it to %d MB.\n",
945 		       mb, limit);
946 		mb = limit;
947 	}
948 
949 	mb /= num_possible_cpus();
950 	pages = mb << (20 - PAGE_SHIFT);
951 
952 	cfs_tracefile_write_lock();
953 
954 	cfs_tcd_for_each(tcd, i, j)
955 		tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
956 
957 	cfs_tracefile_write_unlock();
958 
959 	return 0;
960 }
961 
cfs_trace_get_debug_mb(void)962 int cfs_trace_get_debug_mb(void)
963 {
964 	int i;
965 	int j;
966 	struct cfs_trace_cpu_data *tcd;
967 	int total_pages = 0;
968 
969 	cfs_tracefile_read_lock();
970 
971 	cfs_tcd_for_each(tcd, i, j)
972 		total_pages += tcd->tcd_max_pages;
973 
974 	cfs_tracefile_read_unlock();
975 
976 	return (total_pages >> (20 - PAGE_SHIFT)) + 1;
977 }
978 
tracefiled(void * arg)979 static int tracefiled(void *arg)
980 {
981 	struct page_collection pc;
982 	struct tracefiled_ctl *tctl = arg;
983 	struct cfs_trace_page *tage;
984 	struct cfs_trace_page *tmp;
985 	mm_segment_t __oldfs;
986 	struct file *filp;
987 	char *buf;
988 	int last_loop = 0;
989 	int rc;
990 
991 	/* we're started late enough that we pick up init's fs context */
992 	/* this is so broken in uml?  what on earth is going on? */
993 
994 	complete(&tctl->tctl_start);
995 
996 	while (1) {
997 		wait_queue_t __wait;
998 
999 		pc.pc_want_daemon_pages = 0;
1000 		collect_pages(&pc);
1001 		if (list_empty(&pc.pc_pages))
1002 			goto end_loop;
1003 
1004 		filp = NULL;
1005 		cfs_tracefile_read_lock();
1006 		if (cfs_tracefile[0] != 0) {
1007 			filp = filp_open(cfs_tracefile,
1008 					 O_CREAT | O_RDWR | O_LARGEFILE,
1009 					 0600);
1010 			if (IS_ERR(filp)) {
1011 				rc = PTR_ERR(filp);
1012 				filp = NULL;
1013 				printk(KERN_WARNING "couldn't open %s: %d\n",
1014 				       cfs_tracefile, rc);
1015 			}
1016 		}
1017 		cfs_tracefile_read_unlock();
1018 		if (!filp) {
1019 			put_pages_on_daemon_list(&pc);
1020 			__LASSERT(list_empty(&pc.pc_pages));
1021 			goto end_loop;
1022 		}
1023 		__oldfs = get_fs();
1024 		set_fs(get_ds());
1025 
1026 		list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
1027 			static loff_t f_pos;
1028 
1029 			__LASSERT_TAGE_INVARIANT(tage);
1030 
1031 			if (f_pos >= (off_t)cfs_tracefile_size)
1032 				f_pos = 0;
1033 			else if (f_pos > i_size_read(file_inode(filp)))
1034 				f_pos = i_size_read(file_inode(filp));
1035 
1036 			buf = kmap(tage->page);
1037 			rc = vfs_write(filp, (__force const char __user *)buf,
1038 				       tage->used, &f_pos);
1039 			kunmap(tage->page);
1040 
1041 			if (rc != (int)tage->used) {
1042 				printk(KERN_WARNING "wanted to write %u but wrote %d\n",
1043 				       tage->used, rc);
1044 				put_pages_back(&pc);
1045 				__LASSERT(list_empty(&pc.pc_pages));
1046 				break;
1047 			}
1048 		}
1049 		set_fs(__oldfs);
1050 
1051 		filp_close(filp, NULL);
1052 		put_pages_on_daemon_list(&pc);
1053 		if (!list_empty(&pc.pc_pages)) {
1054 			int i;
1055 
1056 			printk(KERN_ALERT "Lustre: trace pages aren't empty\n");
1057 			pr_err("total cpus(%d): ", num_possible_cpus());
1058 			for (i = 0; i < num_possible_cpus(); i++)
1059 				if (cpu_online(i))
1060 					pr_cont("%d(on) ", i);
1061 				else
1062 					pr_cont("%d(off) ", i);
1063 			pr_cont("\n");
1064 
1065 			i = 0;
1066 			list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
1067 						 linkage)
1068 				pr_err("page %d belongs to cpu %d\n",
1069 				       ++i, tage->cpu);
1070 			pr_err("There are %d pages unwritten\n", i);
1071 		}
1072 		__LASSERT(list_empty(&pc.pc_pages));
1073 end_loop:
1074 		if (atomic_read(&tctl->tctl_shutdown)) {
1075 			if (last_loop == 0) {
1076 				last_loop = 1;
1077 				continue;
1078 			} else {
1079 				break;
1080 			}
1081 		}
1082 		init_waitqueue_entry(&__wait, current);
1083 		add_wait_queue(&tctl->tctl_waitq, &__wait);
1084 		set_current_state(TASK_INTERRUPTIBLE);
1085 		schedule_timeout(cfs_time_seconds(1));
1086 		remove_wait_queue(&tctl->tctl_waitq, &__wait);
1087 	}
1088 	complete(&tctl->tctl_stop);
1089 	return 0;
1090 }
1091 
cfs_trace_start_thread(void)1092 int cfs_trace_start_thread(void)
1093 {
1094 	struct tracefiled_ctl *tctl = &trace_tctl;
1095 	struct task_struct *task;
1096 	int rc = 0;
1097 
1098 	mutex_lock(&cfs_trace_thread_mutex);
1099 	if (thread_running)
1100 		goto out;
1101 
1102 	init_completion(&tctl->tctl_start);
1103 	init_completion(&tctl->tctl_stop);
1104 	init_waitqueue_head(&tctl->tctl_waitq);
1105 	atomic_set(&tctl->tctl_shutdown, 0);
1106 
1107 	task = kthread_run(tracefiled, tctl, "ktracefiled");
1108 	if (IS_ERR(task)) {
1109 		rc = PTR_ERR(task);
1110 		goto out;
1111 	}
1112 
1113 	wait_for_completion(&tctl->tctl_start);
1114 	thread_running = 1;
1115 out:
1116 	mutex_unlock(&cfs_trace_thread_mutex);
1117 	return rc;
1118 }
1119 
cfs_trace_stop_thread(void)1120 void cfs_trace_stop_thread(void)
1121 {
1122 	struct tracefiled_ctl *tctl = &trace_tctl;
1123 
1124 	mutex_lock(&cfs_trace_thread_mutex);
1125 	if (thread_running) {
1126 		printk(KERN_INFO
1127 		       "Lustre: shutting down debug daemon thread...\n");
1128 		atomic_set(&tctl->tctl_shutdown, 1);
1129 		wait_for_completion(&tctl->tctl_stop);
1130 		thread_running = 0;
1131 	}
1132 	mutex_unlock(&cfs_trace_thread_mutex);
1133 }
1134 
cfs_tracefile_init(int max_pages)1135 int cfs_tracefile_init(int max_pages)
1136 {
1137 	struct cfs_trace_cpu_data *tcd;
1138 	int		    i;
1139 	int		    j;
1140 	int		    rc;
1141 	int		    factor;
1142 
1143 	rc = cfs_tracefile_init_arch();
1144 	if (rc != 0)
1145 		return rc;
1146 
1147 	cfs_tcd_for_each(tcd, i, j) {
1148 		/* tcd_pages_factor is initialized int tracefile_init_arch. */
1149 		factor = tcd->tcd_pages_factor;
1150 		INIT_LIST_HEAD(&tcd->tcd_pages);
1151 		INIT_LIST_HEAD(&tcd->tcd_stock_pages);
1152 		INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
1153 		tcd->tcd_cur_pages = 0;
1154 		tcd->tcd_cur_stock_pages = 0;
1155 		tcd->tcd_cur_daemon_pages = 0;
1156 		tcd->tcd_max_pages = (max_pages * factor) / 100;
1157 		LASSERT(tcd->tcd_max_pages > 0);
1158 		tcd->tcd_shutting_down = 0;
1159 	}
1160 
1161 	return 0;
1162 }
1163 
trace_cleanup_on_all_cpus(void)1164 static void trace_cleanup_on_all_cpus(void)
1165 {
1166 	struct cfs_trace_cpu_data *tcd;
1167 	struct cfs_trace_page *tage;
1168 	struct cfs_trace_page *tmp;
1169 	int i, cpu;
1170 
1171 	for_each_possible_cpu(cpu) {
1172 		cfs_tcd_for_each_type_lock(tcd, i, cpu) {
1173 			tcd->tcd_shutting_down = 1;
1174 
1175 			list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages,
1176 						 linkage) {
1177 				__LASSERT_TAGE_INVARIANT(tage);
1178 
1179 				list_del(&tage->linkage);
1180 				cfs_tage_free(tage);
1181 			}
1182 
1183 			tcd->tcd_cur_pages = 0;
1184 		}
1185 	}
1186 }
1187 
cfs_trace_cleanup(void)1188 static void cfs_trace_cleanup(void)
1189 {
1190 	struct page_collection pc;
1191 
1192 	INIT_LIST_HEAD(&pc.pc_pages);
1193 
1194 	trace_cleanup_on_all_cpus();
1195 
1196 	cfs_tracefile_fini_arch();
1197 }
1198 
cfs_tracefile_exit(void)1199 void cfs_tracefile_exit(void)
1200 {
1201 	cfs_trace_stop_thread();
1202 	cfs_trace_cleanup();
1203 }
1204