• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2022 Huawei Technologies Co., Ltd. All rights reserved.
4  */
5 
6 #define pr_fmt(fmt) "hungtask_base " fmt
7 
8 #include <linux/nmi.h>
9 #include <linux/delay.h>
10 #include <linux/freezer.h>
11 #include <linux/utsname.h>
12 #include <trace/events/sched.h>
13 #include <linux/slab.h>
14 #include <linux/version.h>
15 #include <linux/sched/debug.h>
16 #include <linux/suspend.h>
17 #include <linux/spinlock.h>
18 #ifdef CONFIG_DFX_ZEROHUNG
19 #include <dfx/zrhung.h>
20 #endif
21 #include <dfx/hungtask_base.h>
22 #include "hungtask_user.h"
23 
24 static struct rb_root list_tasks = RB_ROOT;
25 static DEFINE_SPINLOCK(list_tasks_lock);
26 static struct hlist_head whitelist[WHITELIST_LEN];
27 static struct whitelist_item whitetmplist[WHITELIST_LEN];
28 static bool whitelist_empty = true;
29 static int remove_cnt;
30 static struct task_item *remove_list[MAX_REMOVE_LIST_NUM + 1];
31 static unsigned long __read_mostly hungtask_timeout_secs =
32 	CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
33 static int did_panic;
34 static unsigned int hungtask_enable = HT_DISABLE;
35 static unsigned int whitelist_type = WHITE_LIST;
36 static int whitelist_dump_cnt = DEFAULT_WHITE_DUMP_CNT;
37 static int whitelist_panic_cnt = DEFAULT_WHITE_PANIC_CNT;
38 static int appspawn_pid;
39 static int dump_and_upload;
40 static int time_since_upload;
41 static int hung_task_must_panic;
42 static int report_zrhung_id;
43 static struct task_hung_upload upload;
44 static int do_refresh;
45 static char frozen_buf[FROZEN_BUF_LEN];
46 static int frozen_used;
47 static bool frozed_head;
48 static unsigned long cur_heartbeat;
49 static struct work_struct send_work;
50 static char report_buf_text[REPORT_MSGLENGTH];
51 
hashlist_find(struct hlist_head * head,int count,pid_t tgid)52 bool hashlist_find(struct hlist_head *head, int count, pid_t tgid)
53 {
54 	struct hashlist_node *hnode = NULL;
55 
56 	if (count <= 0)
57 		return false;
58 	if (hlist_empty(&head[tgid % count]))
59 		return false;
60 	hlist_for_each_entry(hnode, &head[tgid % count], list) {
61 		if (hnode->pid == tgid)
62 			return true;
63 	}
64 	return false;
65 }
66 
hashlist_clear(struct hlist_head * head,int count)67 void hashlist_clear(struct hlist_head *head, int count)
68 {
69 	int i = 0;
70 	struct hlist_node *n = NULL;
71 	struct hashlist_node *hnode = NULL;
72 
73 	for (i = 0; i < count; i++) {
74 		hlist_for_each_entry_safe(hnode, n, &head[i], list) {
75 			hlist_del(&hnode->list);
76 			kfree(hnode);
77 			hnode = NULL;
78 		}
79 	}
80 	for (i = 0; i < count; i++)
81 		INIT_HLIST_HEAD(&head[i]);
82 }
83 
hashlist_insert(struct hlist_head * head,int count,pid_t tgid)84 bool hashlist_insert(struct hlist_head *head, int count, pid_t tgid)
85 {
86 	struct hashlist_node *hnode = NULL;
87 
88 	if (hashlist_find(head, count, tgid))
89 		return false;
90 	hnode = kmalloc(sizeof(struct hashlist_node), GFP_ATOMIC);
91 	if (!hnode)
92 		return false;
93 	INIT_HLIST_NODE(&hnode->list);
94 	hnode->pid = tgid;
95 	hlist_add_head(&hnode->list, &head[tgid % count]);
96 	return true;
97 }
98 
rcu_lock_break(struct task_struct * g,struct task_struct * t)99 static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
100 {
101 	bool can_cont = false;
102 
103 	get_task_struct(g);
104 	get_task_struct(t);
105 	rcu_read_unlock();
106 	cond_resched();
107 	rcu_read_lock();
108 	can_cont = pid_alive(g) && pid_alive(t);
109 	put_task_struct(t);
110 	put_task_struct(g);
111 	return can_cont;
112 }
113 
rcu_break(int * max_count,int * batch_count,struct task_struct * g,struct task_struct * t)114 static bool rcu_break(int *max_count, int *batch_count,
115 		      struct task_struct *g,
116 		      struct task_struct *t)
117 {
118 	if (!(*max_count)--)
119 		return true;
120 	if (!--(*batch_count)) {
121 		*batch_count = HUNG_TASK_BATCHING;
122 		if (!rcu_lock_break(g, t))
123 			return true;
124 	}
125 	return false;
126 }
127 
get_pid_by_name(const char * name)128 static pid_t get_pid_by_name(const char *name)
129 {
130 	int max_count = PID_MAX_LIMIT;
131 	int batch_count = HUNG_TASK_BATCHING;
132 	struct task_struct *g = NULL;
133 	struct task_struct *t = NULL;
134 	int pid = 0;
135 
136 	rcu_read_lock();
137 	do_each_thread(g, t) {
138 		if (rcu_break(&max_count, &batch_count, g, t))
139 			goto unlock;
140 		if (!strncmp(t->comm, name, TASK_COMM_LEN)) {
141 			pid = t->tgid;
142 			goto unlock;
143 		}
144 	} while_each_thread(g, t);
145 
146 unlock:
147 	rcu_read_unlock();
148 	return pid;
149 }
150 
get_task_type(pid_t pid,pid_t tgid,struct task_struct * parent)151 static unsigned int get_task_type(pid_t pid, pid_t tgid, struct task_struct *parent)
152 {
153 	unsigned int flag = TASK_TYPE_IGNORE;
154 	/* check tgid of it's parent as PPID */
155 	if (parent) {
156 		pid_t ppid = parent->tgid;
157 
158 		if (ppid == PID_KTHREAD)
159 			flag |= TASK_TYPE_KERNEL;
160 		else if (ppid == appspawn_pid)
161 			flag |= TASK_TYPE_APP;
162 		else if (ppid == PID_INIT)
163 			flag |= TASK_TYPE_NATIVE;
164 	}
165 	if (!whitelist_empty && hashlist_find(whitelist, WHITELIST_LEN, tgid))
166 		flag |= TASK_TYPE_WHITE | TASK_TYPE_JANK;
167 
168 	return flag;
169 }
170 
refresh_appspawn_pids(void)171 static void refresh_appspawn_pids(void)
172 {
173 	int max_count = PID_MAX_LIMIT;
174 	int batch_count = HUNG_TASK_BATCHING;
175 	struct task_struct *g = NULL;
176 	struct task_struct *t = NULL;
177 
178 	rcu_read_lock();
179 	do_each_thread(g, t) {
180 		if (rcu_break(&max_count, &batch_count, g, t))
181 			goto unlock;
182 		if (!strncmp(t->comm, "appspawn", TASK_COMM_LEN))
183 			appspawn_pid = t->tgid;
184 	} while_each_thread(g, t);
185 unlock:
186 	rcu_read_unlock();
187 }
188 
refresh_task_type(pid_t pid,int task_type)189 static void refresh_task_type(pid_t pid, int task_type)
190 {
191 	struct task_item *item = NULL;
192 	struct rb_node *p = NULL;
193 
194 	spin_lock(&list_tasks_lock);
195 	for (p = rb_first(&list_tasks); p; p = rb_next(p)) {
196 		item = rb_entry(p, struct task_item, node);
197 		if (item->tgid == pid)
198 			item->task_type = task_type;
199 	}
200 	spin_unlock(&list_tasks_lock);
201 }
202 
refresh_whitelist_pids(void)203 static void refresh_whitelist_pids(void)
204 {
205 	int i;
206 
207 	hashlist_clear(whitelist, WHITELIST_LEN);
208 	for (i = 0; i < WHITELIST_LEN; i++) {
209 		if (!strlen(whitetmplist[i].name))
210 			continue;
211 		whitetmplist[i].pid =
212 			get_pid_by_name(whitetmplist[i].name);
213 		if (!whitetmplist[i].pid)
214 			continue;
215 		refresh_task_type(whitetmplist[i].pid,
216 			TASK_TYPE_WHITE | TASK_TYPE_JANK);
217 		if (hashlist_insert(whitelist, WHITELIST_LEN,
218 			whitetmplist[i].pid))
219 			pr_info("whitelist[%d]-%s-%d\n", i,
220 				whitetmplist[i].name, whitetmplist[i].pid);
221 		else
222 			pr_info("can't find %s\n", whitetmplist[i].name);
223 	}
224 	refresh_appspawn_pids();
225 }
226 
find_task(pid_t pid,struct rb_root * root)227 static struct task_item *find_task(pid_t pid, struct rb_root *root)
228 {
229 	struct rb_node **p = &root->rb_node;
230 	struct task_item *cur = NULL;
231 	struct rb_node *parent = NULL;
232 
233 	while (*p) {
234 		parent = *p;
235 		cur = rb_entry(parent, struct task_item, node);
236 		if (!cur)
237 			return NULL;
238 		if (pid < cur->pid)
239 			p = &(*p)->rb_left;
240 		else if (pid > cur->pid)
241 			p = &(*p)->rb_right;
242 		else
243 			return cur;
244 	}
245 	return NULL;
246 }
247 
insert_task(struct task_item * item,struct rb_root * root)248 static bool insert_task(struct task_item *item, struct rb_root *root)
249 {
250 	struct rb_node **p = &root->rb_node;
251 	struct rb_node *parent = NULL;
252 	struct task_item *cur = NULL;
253 
254 	while (*p) {
255 		parent = *p;
256 
257 		cur = rb_entry(parent, struct task_item, node);
258 		if (!cur)
259 			return false;
260 		if (item->pid < cur->pid) {
261 			p = &(*p)->rb_left;
262 		} else if (item->pid > cur->pid) {
263 			p = &(*p)->rb_right;
264 		} else {
265 			pr_info("insert pid=%d,tgid=%d,name=%s,type=%d fail\n",
266 				item->pid, item->tgid,
267 				item->name, item->task_type);
268 			return false;
269 		}
270 	}
271 	rb_link_node(&item->node, parent, p);
272 	rb_insert_color(&item->node, root);
273 	return true;
274 }
275 
show_block_task(struct task_item * taskitem,struct task_struct * p)276 void show_block_task(struct task_item *taskitem, struct task_struct *p)
277 {
278 	unsigned long last_arrival;
279 	unsigned long last_queued;
280 
281 #ifdef CONFIG_SCHED_INFO
282 	last_arrival = p->sched_info.last_arrival;
283 	last_queued = p->sched_info.last_queued;
284 #else
285 	last_arrival = 0;
286 	last_queued = 0;
287 #endif /* CONFIG_SCHED_INFO */
288 	if (unlikely(p->flags & PF_FROZEN)) {
289 		if (taskitem)
290 			pr_err("name=%s,PID=%d,tgid=%d,tgname=%s,"
291 			       "FROZEN for %ds,type=%d,la%lu/lq%lu\n",
292 			       p->comm, p->pid, p->tgid,
293 			       p->group_leader->comm,
294 			       taskitem->d_state_time * HEARTBEAT_TIME,
295 			       taskitem->task_type,
296 			       last_arrival, last_queued);
297 		else
298 			pr_err("name=%s,PID=%d,tgid=%d,tgname=%s,"
299 			       "just FROZE,la%lu/lq%lu\n",
300 			       p->comm, p->pid, p->tgid,
301 			       p->group_leader->comm,
302 			       last_arrival, last_queued);
303 	} else {
304 		if (taskitem)
305 			pr_err("name=%s,PID=%d,tgid=%d,prio=%d,cpu=%d,tgname=%s,"
306 			       "type=%d,blocked for %ds,la%lu/lq%lu\n",
307 			       taskitem->name, taskitem->pid, p->tgid, p->prio,
308 			       task_cpu(p), p->group_leader->comm, taskitem->task_type,
309 			       taskitem->d_state_time * HEARTBEAT_TIME,
310 			       last_arrival, last_queued);
311 		else
312 			pr_err("name=%s,PID=%d,tgid=%d,prio=%d,cpu=%d,"
313 			       "tgname=%s,la%lu/lq%lu\n",
314 			       p->comm, p->pid, p->tgid, p->prio, task_cpu(p),
315 			       p->group_leader->comm,
316 			       last_arrival, last_queued);
317 
318 		sched_show_task(p);
319 	}
320 }
321 
htbase_show_state_filter(unsigned long state_filter)322 void htbase_show_state_filter(unsigned long state_filter)
323 {
324 	struct task_struct *g = NULL;
325 	struct task_struct *p = NULL;
326 	struct task_item *taskitem = NULL;
327 
328 #if BITS_PER_LONG == 32
329 	pr_info("  task                PC stack   pid father\n");
330 #else
331 	pr_info("  task                        PC stack   pid father\n");
332 #endif
333 	rcu_read_lock();
334 	for_each_process_thread(g, p) {
335 		/*
336 		 * reset the NMI-timeout, listing all files on a slow
337 		 * console might take a lot of time:
338 		 */
339 		touch_nmi_watchdog();
340 		if ((p->state == TASK_RUNNING) || (p->state & state_filter)) {
341 			spin_lock(&list_tasks_lock);
342 			taskitem = find_task(p->pid, &list_tasks);
343 			spin_unlock(&list_tasks_lock);
344 			show_block_task(taskitem, p);
345 		}
346 	}
347 	touch_all_softlockup_watchdogs();
348 	rcu_read_unlock();
349 	/* Show locks if hungtask happen */
350 	if ((state_filter == TASK_UNINTERRUPTIBLE) || !state_filter)
351 		debug_show_all_locks();
352 }
353 
hungtask_show_state_filter(unsigned long state_filter)354 void hungtask_show_state_filter(unsigned long state_filter)
355 {
356 	pr_err("BinderChain_SysRq start\n");
357 	htbase_show_state_filter(state_filter);
358 	pr_err("BinderChain_SysRq end\n");
359 }
360 
do_dump_task(struct task_struct * task)361 void do_dump_task(struct task_struct *task)
362 {
363 	sched_show_task(task);
364 	debug_show_held_locks(task);
365 }
366 
do_show_task(struct task_struct * task,unsigned int flag,int d_state_time)367 void do_show_task(struct task_struct *task, unsigned int flag, int d_state_time)
368 {
369 	pr_err("%s, flag=%d\n", __func__, flag);
370 	rcu_read_lock();
371 	if (!pid_alive(task)) {
372 		rcu_read_unlock();
373 		return;
374 	}
375 	if (flag & (FLAG_DUMP_WHITE | FLAG_DUMP_APP)) {
376 		int cnt = 0;
377 
378 		trace_sched_process_hang(task);
379 		cnt = d_state_time;
380 		pr_err("INFO: task %s:%d tgid:%d blocked for %ds in %s\n",
381 		       task->comm, task->pid, task->tgid,
382 		       (HEARTBEAT_TIME * cnt),
383 		       (flag & FLAG_DUMP_WHITE) ? "whitelist" : "applist");
384 		pr_err("      %s %s %.*s\n",
385 		       print_tainted(), init_utsname()->release,
386 		       (int)strcspn(init_utsname()->version, " "),
387 		       init_utsname()->version);
388 		do_dump_task(task);
389 		touch_nmi_watchdog();
390 		if (flag & FLAG_DUMP_WHITE && (!dump_and_upload)) {
391 			dump_and_upload++;
392 			upload.pid = task->pid;
393 			upload.tgid = task->tgid;
394 			upload.duration = d_state_time;
395 			memset(upload.name, 0, sizeof(upload.name));
396 			strncpy(upload.name, task->comm, sizeof(upload.name));
397 			upload.flag = flag;
398 			if (task->flags & PF_FROZEN)
399 				upload.flag = (upload.flag | FLAG_PF_FROZEN);
400 		}
401 	}
402 	rcu_read_unlock();
403 }
404 
do_panic(void)405 static void do_panic(void)
406 {
407 	if (sysctl_hung_task_panic) {
408 		trigger_all_cpu_backtrace();
409 		panic("hungtask: blocked tasks");
410 	}
411 }
412 
create_taskitem(struct task_item * taskitem,struct task_struct * task)413 static void create_taskitem(struct task_item *taskitem,
414 			    struct task_struct *task)
415 {
416 	taskitem->pid = task->pid;
417 	taskitem->tgid = task->tgid;
418 	memset(taskitem->name, 0, sizeof(taskitem->name));
419 	strncpy(taskitem->name, task->comm, sizeof(taskitem->name));
420 	taskitem->switch_count = task->nvcsw + task->nivcsw;
421 	taskitem->dump_wa = 0; /* whitelist or applist task dump times */
422 	taskitem->panic_wa = 0; /* whitelist or applist task panic times */
423 	taskitem->d_state_time = -1;
424 	taskitem->isdone_wa = true; /* if task in white or app dealed */
425 }
426 
refresh_task(struct task_item * taskitem,struct task_struct * task)427 static bool refresh_task(struct task_item *taskitem, struct task_struct *task)
428 {
429 	bool is_called = false;
430 
431 	if (taskitem->switch_count != (task->nvcsw + task->nivcsw)) {
432 		taskitem->switch_count = task->nvcsw + task->nivcsw;
433 		is_called = true;
434 		return is_called;
435 	}
436 	if (taskitem->task_type & TASK_TYPE_WHITE) {
437 		taskitem->isdone_wa = false;
438 		taskitem->dump_wa++;
439 		taskitem->panic_wa++;
440 	}
441 	taskitem->d_state_time++;
442 	if (task->flags & PF_FROZEN)
443 		taskitem->task_type |= TASK_TYPE_FROZEN;
444 	return is_called;
445 }
446 
remove_list_tasks(struct task_item * item)447 static void remove_list_tasks(struct task_item *item)
448 {
449 	rb_erase(&item->node, &list_tasks);
450 	kfree(item);
451 }
452 
shrink_process_item(struct task_item * item,bool * is_finish)453 static void shrink_process_item(struct task_item *item, bool *is_finish)
454 {
455 	if (remove_cnt >= MAX_REMOVE_LIST_NUM) {
456 		int i;
457 
458 		remove_list[remove_cnt++] = item;
459 		for (i = 0; i < remove_cnt; i++)
460 			remove_list_tasks(remove_list[i]);
461 		remove_cnt = 0;
462 		*is_finish = false;
463 	} else {
464 		remove_list[remove_cnt++] = item;
465 	}
466 }
467 
shrink_list_tasks(void)468 static void shrink_list_tasks(void)
469 {
470 	int i;
471 	bool is_finish = false;
472 	struct rb_node *n = NULL;
473 	struct task_item *item = NULL;
474 
475 	spin_lock(&list_tasks_lock);
476 	while (!is_finish) {
477 		is_finish = true;
478 		for (n = rb_first(&list_tasks); n != NULL; n = rb_next(n)) {
479 			item = rb_entry(n, struct task_item, node);
480 			if (!item)
481 				continue;
482 			if (item->isdone_wa) {
483 				shrink_process_item(item, &is_finish);
484 				if (!is_finish)
485 					break;
486 			}
487 		}
488 	}
489 	for (i = 0; i < remove_cnt; i++)
490 		remove_list_tasks(remove_list[i]);
491 	remove_cnt = 0;
492 	spin_unlock(&list_tasks_lock);
493 }
494 
check_parameters(void)495 static void check_parameters(void)
496 {
497 	if ((whitelist_dump_cnt < 0) ||
498 		(whitelist_dump_cnt > DEFAULT_WHITE_DUMP_CNT))
499 		whitelist_dump_cnt = DEFAULT_WHITE_DUMP_CNT;
500 	if ((whitelist_panic_cnt <= 0) ||
501 		(whitelist_panic_cnt > DEFAULT_WHITE_PANIC_CNT))
502 		whitelist_panic_cnt = DEFAULT_WHITE_PANIC_CNT;
503 }
504 
send_work_handler(struct work_struct * data)505 static void send_work_handler(struct work_struct *data)
506 {
507 #ifdef CONFIG_DFX_ZEROHUNG
508 	zrhung_send_event(HUNGTASK_DOMAIN, HUNGTASK_NAME,
509 		report_buf_text);
510 #endif
511 }
512 
htbase_report_zrhung_event(const char * report_buf_tag)513 static void htbase_report_zrhung_event(const char *report_buf_tag)
514 {
515 	htbase_show_state_filter(TASK_UNINTERRUPTIBLE);
516 	pr_err("%s end\n", report_buf_tag);
517 	schedule_work(&send_work);
518 	report_zrhung_id++;
519 }
520 
htbase_report_zrhung(unsigned int event)521 static void htbase_report_zrhung(unsigned int event)
522 {
523 	bool report_load = false;
524 	char report_buf_tag[REPORT_MSGLENGTH] = {0};
525 	char report_name[TASK_COMM_LEN + 1] = {0};
526 	int report_pid = 0;
527 	int report_hungtime = 0;
528 	int report_tasktype = 0;
529 
530 	if (!event)
531 		return;
532 	if (event & HUNGTASK_EVENT_WHITELIST) {
533 		snprintf(report_buf_tag, sizeof(report_buf_tag),
534 			 "hungtask_whitelist_%d", report_zrhung_id);
535 		strncpy(report_name, upload.name, TASK_COMM_LEN);
536 		report_pid = upload.pid;
537 		report_tasktype = TASK_TYPE_WHITE;
538 		report_hungtime = whitelist_dump_cnt * HEARTBEAT_TIME;
539 		report_load = true;
540 	} else {
541 		pr_err("No such event report to zerohung!");
542 	}
543 	pr_err("%s start\n", report_buf_tag);
544 	if (event & HUNGTASK_EVENT_WHITELIST)
545 		pr_err("report HUNGTASK_EVENT_WHITELIST to zrhung\n");
546 	if (upload.flag & FLAG_PF_FROZEN)
547 		snprintf(report_buf_text, sizeof(report_buf_text),
548 			 "Task %s(%s) pid %d type %d blocked %ds.",
549 			 report_name, "FROZEN", report_pid, report_tasktype, report_hungtime);
550 	else
551 		snprintf(report_buf_text, sizeof(report_buf_text),
552 			 "Task %s pid %d type %d blocked %ds.",
553 			 report_name, report_pid, report_tasktype, report_hungtime);
554 	if (report_load)
555 		htbase_report_zrhung_event(report_buf_tag);
556 }
557 
print_frozen_list_item(int pid)558 static int print_frozen_list_item(int pid)
559 {
560 	int tmp;
561 
562 	if (!frozed_head) {
563 		tmp = snprintf(frozen_buf, FROZEN_BUF_LEN, "%s", "FROZEN Pid:");
564 		if (tmp < 0)
565 			return -1;
566 		frozen_used += min(tmp, FROZEN_BUF_LEN - 1);
567 		frozed_head = true;
568 	}
569 	tmp = snprintf(frozen_buf + frozen_used, FROZEN_BUF_LEN - frozen_used, "%d,",
570 		pid);
571 	if (tmp < 0)
572 		return -1;
573 	frozen_used += min(tmp, FROZEN_BUF_LEN - frozen_used - 1);
574 	return frozen_used;
575 }
576 
dump_task_wa(struct task_item * item,int dump_cnt,struct task_struct * task,unsigned int flag)577 int dump_task_wa(struct task_item *item, int dump_cnt,
578 	struct task_struct *task, unsigned int flag)
579 {
580 	int ret = 0;
581 
582 	if ((item->d_state_time > TWO_MINUTES) &&
583 		(item->d_state_time % TWO_MINUTES != 0))
584 		return ret;
585 	if ((item->d_state_time > HUNG_TEN_MINUTES) &&
586 		(item->d_state_time % HUNG_TEN_MINUTES != 0))
587 		return ret;
588 	if ((item->d_state_time > HUNG_ONE_HOUR) &&
589 		(item->d_state_time % HUNG_ONE_HOUR != 0))
590 		return ret;
591 	if (dump_cnt && (item->dump_wa > dump_cnt)) {
592 		item->dump_wa = 1;
593 		if (!dump_and_upload && task->flags & PF_FROZEN) {
594 			int tmp = print_frozen_list_item(item->pid);
595 			if (tmp < 0)
596 				return ret;
597 			if (tmp >= FROZEN_BUF_LEN - 1) {
598 				pr_err("%s", frozen_buf);
599 				memset(frozen_buf, 0, sizeof(frozen_buf));
600 				frozen_used = 0;
601 				frozed_head = false;
602 				print_frozen_list_item(item->pid);
603 			}
604 		} else if (!dump_and_upload) {
605 			pr_err("Ready to dump a task %s\n", item->name);
606 			do_show_task(task, flag, item->d_state_time);
607 			ret++;
608 		}
609 	}
610 	return ret;
611 }
612 
update_panic_task(struct task_item * item)613 static void update_panic_task(struct task_item *item)
614 {
615 	if (upload.pid != 0)
616 		return;
617 
618 	upload.pid = item->pid;
619 	upload.tgid = item->tgid;
620 	memset(upload.name, 0, sizeof(upload.name));
621 	strncpy(upload.name, item->name, sizeof(upload.name));
622 }
623 
deal_task(struct task_item * item,struct task_struct * task,bool is_called)624 static void deal_task(struct task_item *item, struct task_struct *task, bool is_called)
625 {
626 	int any_dumped_num = 0;
627 
628 	if (is_called) {
629 		item->dump_wa = 1;
630 		item->panic_wa = 1;
631 		item->d_state_time = 0;
632 		return;
633 	}
634 	if (item->task_type & TASK_TYPE_WHITE)
635 		any_dumped_num = dump_task_wa(item, whitelist_dump_cnt, task,
636 					      FLAG_DUMP_WHITE);
637 	if (!is_called && (item->task_type & TASK_TYPE_WHITE)) {
638 		if (whitelist_panic_cnt && item->panic_wa > whitelist_panic_cnt) {
639 			pr_err("Task %s is causing panic\n", item->name);
640 			update_panic_task(item);
641 			item->panic_wa = 0;
642 			hung_task_must_panic++;
643 		} else {
644 			item->isdone_wa = false;
645 		}
646 	}
647 	if (item->isdone_wa)
648 		remove_list_tasks(item);
649 }
650 
check_conditions(struct task_struct * task,unsigned int task_type)651 static bool check_conditions(struct task_struct *task, unsigned int task_type)
652 {
653 	bool no_check = true;
654 
655 	if (task->flags & PF_FROZEN)
656 		return no_check;
657 	if (task_type & TASK_TYPE_WHITE &&
658 		(whitelist_dump_cnt || whitelist_panic_cnt))
659 		no_check = false;
660 	return no_check;
661 }
662 
htbase_check_one_task(struct task_struct * t)663 static void htbase_check_one_task(struct task_struct *t)
664 {
665 	unsigned int task_type = TASK_TYPE_IGNORE;
666 	unsigned long switch_count = t->nvcsw + t->nivcsw;
667 	struct task_item *taskitem = NULL;
668 	bool is_called = false;
669 
670 	if (unlikely(!switch_count)) {
671 		pr_info("skip one's switch_count is zero\n");
672 		return;
673 	}
674 
675 	taskitem = find_task(t->pid, &list_tasks);
676 	if (taskitem) {
677 		if (check_conditions(t, taskitem->task_type))
678 			return;
679 		is_called = refresh_task(taskitem, t);
680 	} else {
681 		task_type = get_task_type(t->pid, t->tgid, t->real_parent);
682 		if (check_conditions(t, task_type))
683 			return;
684 		taskitem = kmalloc(sizeof(*taskitem), GFP_ATOMIC);
685 		if (!taskitem) {
686 			pr_err("kmalloc failed");
687 			return;
688 		}
689 		memset(taskitem, 0, sizeof(*taskitem));
690 		taskitem->task_type = task_type;
691 		create_taskitem(taskitem, t);
692 		is_called = refresh_task(taskitem, t);
693 		insert_task(taskitem, &list_tasks);
694 	}
695 	deal_task(taskitem, t, is_called);
696 }
697 
htbase_pre_process(void)698 static void htbase_pre_process(void)
699 {
700 	htbase_set_timeout_secs(sysctl_hung_task_timeout_secs);
701 	cur_heartbeat++;
702 	if ((cur_heartbeat % REFRESH_INTERVAL) == 0)
703 		do_refresh = 1;
704 	else
705 		do_refresh = 0;
706 	if (do_refresh || (cur_heartbeat < TIME_REFRESH_PIDS)) {
707 		refresh_whitelist_pids();
708 		check_parameters();
709 	}
710 }
711 
htbase_post_process(void)712 static void htbase_post_process(void)
713 {
714 	struct rb_node *n = NULL;
715 	unsigned int hungevent = 0;
716 
717 	if (frozen_used) {
718 		pr_err("%s", frozen_buf);
719 		memset(frozen_buf, 0, sizeof(frozen_buf));
720 		frozen_used = 0;
721 		frozed_head = false;
722 	}
723 	if (dump_and_upload == HUNG_TASK_UPLOAD_ONCE) {
724 		hungevent |= HUNGTASK_EVENT_WHITELIST;
725 		dump_and_upload++;
726 	}
727 	if (dump_and_upload > 0) {
728 		time_since_upload++;
729 		if (time_since_upload > (whitelist_panic_cnt - whitelist_dump_cnt)) {
730 			dump_and_upload = 0;
731 			time_since_upload = 0;
732 		}
733 	}
734 	if (hung_task_must_panic) {
735 		htbase_show_state_filter(TASK_UNINTERRUPTIBLE);
736 		hung_task_must_panic = 0;
737 		pr_err("Task %s:%d blocked for %ds is causing panic\n",
738 		       upload.name, upload.pid,
739 		       whitelist_panic_cnt * HEARTBEAT_TIME);
740 		do_panic();
741 	}
742 	htuser_post_process_userlist();
743 	shrink_list_tasks();
744 	for (n = rb_first(&list_tasks); n != NULL; n = rb_next(n)) {
745 		struct task_item *item = rb_entry(n, struct task_item, node);
746 		item->isdone_wa = true;
747 	}
748 
749 	if (hungevent)
750 		htbase_report_zrhung(hungevent);
751 }
752 
htbase_check_tasks(unsigned long timeout)753 void htbase_check_tasks(unsigned long timeout)
754 {
755 	int max_count = PID_MAX_LIMIT;
756 	int batch_count = HUNG_TASK_BATCHING;
757 	struct task_struct *g = NULL;
758 	struct task_struct *t = NULL;
759 
760 	if (!hungtask_enable)
761 		return;
762 	if (test_taint(TAINT_DIE) || did_panic) {
763 		pr_err("already in doing panic\n");
764 		return;
765 	}
766 
767 	htbase_pre_process();
768 	rcu_read_lock();
769 	for_each_process_thread(g, t) {
770 		if (!max_count--)
771 			goto unlock;
772 		if (!--batch_count) {
773 			batch_count = HUNG_TASK_BATCHING;
774 			if (!rcu_lock_break(g, t))
775 				goto unlock;
776 		}
777 		if ((t->state == TASK_UNINTERRUPTIBLE) ||
778 		    (t->state == TASK_KILLABLE))
779 			htbase_check_one_task(t);
780 	}
781 unlock:
782 	rcu_read_unlock();
783 	htbase_post_process();
784 }
785 
htbase_enable_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)786 static ssize_t htbase_enable_show(struct kobject *kobj,
787 				  struct kobj_attribute *attr,
788 				  char *buf)
789 {
790 	if (hungtask_enable)
791 		return snprintf(buf, ENABLE_SHOW_LEN, "on\n");
792 	else
793 		return snprintf(buf, ENABLE_SHOW_LEN, "off\n");
794 }
795 
htbase_enable_store(struct kobject * kobj,struct kobj_attribute * attr,const char * buf,size_t count)796 static ssize_t htbase_enable_store(struct kobject *kobj,
797 				   struct kobj_attribute *attr,
798 				   const char *buf, size_t count)
799 {
800 	char tmp[6]; /* only storage "on" "off" "kick" and enter */
801 	size_t len;
802 	char *p = NULL;
803 
804 	if (!buf)
805 		return -EINVAL;
806 	if ((count < 2) || (count > (sizeof(tmp) - 1))) {
807 		pr_err("string too long or too short\n");
808 		return -EINVAL;
809 	}
810 
811 	p = memchr(buf, '\n', count);
812 	len = p ? (size_t)(p - buf) : count;
813 	memset(tmp, 0, sizeof(tmp));
814 	strncpy(tmp, buf, len);
815 	if (!strncmp(tmp, "on", strlen(tmp))) {
816 		hungtask_enable = HT_ENABLE;
817 		pr_info("set hungtask_enable to enable\n");
818 	} else if (!strncmp(tmp, "off", strlen(tmp))) {
819 		hungtask_enable = HT_DISABLE;
820 		pr_info("set hungtask_enable to disable\n");
821 	} else {
822 		pr_err("only accept on or off\n");
823 	}
824 	return (ssize_t) count;
825 }
826 
htbase_monitorlist_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)827 static ssize_t htbase_monitorlist_show(struct kobject *kobj,
828 				       struct kobj_attribute *attr,
829 				       char *buf)
830 {
831 	int i;
832 	char *start = buf;
833 	char all_buf[WHITELIST_STORE_LEN - 20];	/* exclude extra header len 20*/
834 	unsigned long len = 0;
835 
836 	memset(all_buf, 0, sizeof(all_buf));
837 	for (i = 0; i < WHITELIST_LEN; i++) {
838 		if (whitetmplist[i].pid > 0) {
839 			len += snprintf(all_buf + len, sizeof(all_buf) - len,
840 					"%s-%d,", whitetmplist[i].name, whitetmplist[i].pid);
841 			if (!(len < sizeof(all_buf))) {
842 				len = sizeof(all_buf) - 1;
843 				break;
844 			}
845 		}
846 	}
847 	if (len > 0)
848 		all_buf[len] = 0;
849 	if (whitelist_type == WHITE_LIST)
850 		buf += snprintf(buf, WHITELIST_STORE_LEN, "whitelist:[%s]\n", all_buf);
851 	else if (whitelist_type == BLACK_LIST)
852 		buf += snprintf(buf, WHITELIST_STORE_LEN, "blacklist:[%s]\n", all_buf);
853 	else
854 		buf += snprintf(buf, WHITELIST_STORE_LEN, "\n");
855 	return buf - start;
856 }
857 
htbase_monitorlist_update(char ** cur)858 static void htbase_monitorlist_update(char **cur)
859 {
860 	int index = 0;
861 	char *token = NULL;
862 
863 	hashlist_clear(whitelist, WHITELIST_LEN);
864 	memset(whitetmplist, 0, sizeof(whitetmplist));
865 	/* generate the new whitelist */
866 	for (; ; ) {
867 		token = strsep(cur, ",");
868 		if (token && strlen(token)) {
869 			strncpy(whitetmplist[index].name, token, TASK_COMM_LEN);
870 			if (strlen(whitetmplist[index].name) > 0)
871 				whitelist_empty = false;
872 			index++;
873 			if (index >= WHITELIST_LEN)
874 				break;
875 		}
876 		if (!(*cur))
877 			break;
878 	}
879 }
880 
881 /*
882  * monitorlist_store    -  Called when 'write/echo' method is
883  * used on entry '/sys/kernel/hungtask/monitorlist'.
884  */
htbase_monitorlist_store(struct kobject * kobj,struct kobj_attribute * attr,const char * buf,size_t n)885 static ssize_t htbase_monitorlist_store(struct kobject *kobj,
886 					struct kobj_attribute *attr,
887 					const char *buf, size_t n)
888 {
889 	size_t len;
890 	char *p = NULL;
891 	char all_buf[WHITELIST_STORE_LEN];
892 	char *cur = all_buf;
893 
894 
895 	if ((n < 2) || (n > (sizeof(all_buf) - 1))) {
896 		pr_err("whitelist input string illegal\n");
897 		return -EINVAL;
898 	}
899 	if (!buf)
900 		return -EINVAL;
901 	/*
902 	 * input format:
903 	 * write /sys/kernel/hungtask/monitorlist "whitelist,
904 	 * system_server,surfaceflinger"
905 	 */
906 	p = memchr(buf, '\n', n);
907 	len = p ? (size_t)(p - buf) : n; /* exclude the '\n' */
908 
909 	memset(all_buf, 0, sizeof(all_buf));
910 	len =  len > WHITELIST_STORE_LEN ? WHITELIST_STORE_LEN : len;
911 	strncpy(all_buf, buf, len);
912 	p = strsep(&cur, ",");
913 	if (!cur) {
914 		pr_err("string is not correct\n");
915 		return -EINVAL;
916 	}
917 	if (!strncmp(p, "whitelist", n)) {
918 		whitelist_type = WHITE_LIST;
919 	} else {
920 		if (!strncmp(p, "blacklist", n))
921 			pr_err("blacklist is not support\n");
922 		else
923 			pr_err("wrong list type is set\n");
924 		return -EINVAL;
925 	}
926 	if (!strlen(cur)) {
927 		pr_err("at least one process need to be set\n");
928 		return -EINVAL;
929 	}
930 	pr_err("whitelist is %s\n", cur);
931 
932 	htbase_monitorlist_update(&cur);
933 	/* check again in case user input "whitelist,,,,,," */
934 	if (whitelist_empty) {
935 		pr_err("at least one process need to be set\n");
936 		return -EINVAL;
937 	}
938 	return (ssize_t) n;
939 }
940 
941 /* used for sysctl at "/proc/sys/kernel/hung_task_timeout_secs" */
htbase_set_timeout_secs(unsigned long new_hungtask_timeout_secs)942 void htbase_set_timeout_secs(unsigned long new_hungtask_timeout_secs)
943 {
944 	if ((new_hungtask_timeout_secs > CONFIG_DEFAULT_HUNG_TASK_TIMEOUT) ||
945 		(new_hungtask_timeout_secs % HEARTBEAT_TIME))
946 		return;
947 	hungtask_timeout_secs = new_hungtask_timeout_secs;
948 	/*
949 	 * if user change panic timeout value, we sync it to dump value
950 	 * defaultly, user can set it diffrently
951 	 */
952 	whitelist_panic_cnt = (int)(hungtask_timeout_secs / HEARTBEAT_TIME);
953 	if (whitelist_panic_cnt > THIRTY_SECONDS)
954 		whitelist_dump_cnt = whitelist_panic_cnt / HT_DUMP_IN_PANIC_LOOSE;
955 	else
956 		whitelist_dump_cnt = whitelist_panic_cnt / HT_DUMP_IN_PANIC_STRICT;
957 }
958 
htbase_set_panic(int new_did_panic)959 void htbase_set_panic(int new_did_panic)
960 {
961 	did_panic = new_did_panic;
962 }
963 
964 static struct kobj_attribute timeout_attribute = {
965 	.attr = {
966 		 .name = "enable",
967 		 .mode = 0640,
968 	},
969 	.show = htbase_enable_show,
970 	.store = htbase_enable_store,
971 };
972 
973 static struct kobj_attribute monitorlist_attr = {
974 	.attr = {
975 		 .name = "monitorlist",
976 		 .mode = 0640,
977 	},
978 	.show = htbase_monitorlist_show,
979 	.store = htbase_monitorlist_store,
980 };
981 
982 #ifdef CONFIG_DFX_HUNGTASK_USER
983 static struct kobj_attribute userlist_attr = {
984 	.attr = {
985 		 .name = "userlist",
986 		 .mode = 0640,
987 	},
988 	.show = htuser_list_show,
989 	.store = htuser_list_store,
990 };
991 #endif
992 
993 static struct attribute *attrs[] = {
994 	&timeout_attribute.attr,
995 	&monitorlist_attr.attr,
996 #ifdef CONFIG_DFX_HUNGTASK_USER
997 	&userlist_attr.attr,
998 #endif
999 	NULL
1000 };
1001 
1002 static struct attribute_group hungtask_attr_group = {
1003 	.attrs = attrs,
1004 };
1005 
1006 static struct kobject *hungtask_kobj;
htbase_create_sysfs(void)1007 int htbase_create_sysfs(void)
1008 {
1009 	int i;
1010 	int ret;
1011 
1012 	/* sleep 1000ms and wait /sys/kernel ready */
1013 	while (!kernel_kobj)
1014 		msleep(1000);
1015 
1016 	/* Create kobject named "hungtask" located at /sys/kernel/huangtask */
1017 	hungtask_kobj = kobject_create_and_add("hungtask", kernel_kobj);
1018 	if (!hungtask_kobj)
1019 		return -ENOMEM;
1020 	ret = sysfs_create_group(hungtask_kobj, &hungtask_attr_group);
1021 	if (ret)
1022 		kobject_put(hungtask_kobj);
1023 
1024 	for (i = 0; i < WHITELIST_LEN; i++)
1025 		INIT_HLIST_HEAD(&whitelist[i]);
1026 	memset(whitetmplist, 0, sizeof(whitetmplist));
1027 
1028 	INIT_WORK(&send_work, send_work_handler);
1029 
1030 	return ret;
1031 }
1032