1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2022 Huawei Technologies Co., Ltd. All rights reserved.
4 */
5
6 #define pr_fmt(fmt) "hungtask_base " fmt
7
8 #include <linux/nmi.h>
9 #include <linux/delay.h>
10 #include <linux/freezer.h>
11 #include <linux/utsname.h>
12 #include <trace/events/sched.h>
13 #include <linux/slab.h>
14 #include <linux/version.h>
15 #include <linux/sched/debug.h>
16 #include <linux/suspend.h>
17 #include <linux/spinlock.h>
18 #ifdef CONFIG_DFX_ZEROHUNG
19 #include <dfx/zrhung.h>
20 #endif
21 #include <dfx/hungtask_base.h>
22 #include "hungtask_user.h"
23
24 static struct rb_root list_tasks = RB_ROOT;
25 static DEFINE_SPINLOCK(list_tasks_lock);
26 static struct hlist_head whitelist[WHITELIST_LEN];
27 static struct whitelist_item whitetmplist[WHITELIST_LEN];
28 static bool whitelist_empty = true;
29 static int remove_cnt;
30 static struct task_item *remove_list[MAX_REMOVE_LIST_NUM + 1];
31 static unsigned long __read_mostly hungtask_timeout_secs =
32 CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
33 static int did_panic;
34 static unsigned int hungtask_enable = HT_DISABLE;
35 static unsigned int whitelist_type = WHITE_LIST;
36 static int whitelist_dump_cnt = DEFAULT_WHITE_DUMP_CNT;
37 static int whitelist_panic_cnt = DEFAULT_WHITE_PANIC_CNT;
38 static int appspawn_pid;
39 static int dump_and_upload;
40 static int time_since_upload;
41 static int hung_task_must_panic;
42 static int report_zrhung_id;
43 static struct task_hung_upload upload;
44 static int do_refresh;
45 static char frozen_buf[FROZEN_BUF_LEN];
46 static int frozen_used;
47 static bool frozed_head;
48 static unsigned long cur_heartbeat;
49 static struct work_struct send_work;
50 static char report_buf_text[REPORT_MSGLENGTH];
51
hashlist_find(struct hlist_head * head,int count,pid_t tgid)52 bool hashlist_find(struct hlist_head *head, int count, pid_t tgid)
53 {
54 struct hashlist_node *hnode = NULL;
55
56 if (count <= 0)
57 return false;
58 if (hlist_empty(&head[tgid % count]))
59 return false;
60 hlist_for_each_entry(hnode, &head[tgid % count], list) {
61 if (hnode->pid == tgid)
62 return true;
63 }
64 return false;
65 }
66
hashlist_clear(struct hlist_head * head,int count)67 void hashlist_clear(struct hlist_head *head, int count)
68 {
69 int i = 0;
70 struct hlist_node *n = NULL;
71 struct hashlist_node *hnode = NULL;
72
73 for (i = 0; i < count; i++) {
74 hlist_for_each_entry_safe(hnode, n, &head[i], list) {
75 hlist_del(&hnode->list);
76 kfree(hnode);
77 hnode = NULL;
78 }
79 }
80 for (i = 0; i < count; i++)
81 INIT_HLIST_HEAD(&head[i]);
82 }
83
hashlist_insert(struct hlist_head * head,int count,pid_t tgid)84 bool hashlist_insert(struct hlist_head *head, int count, pid_t tgid)
85 {
86 struct hashlist_node *hnode = NULL;
87
88 if (hashlist_find(head, count, tgid))
89 return false;
90 hnode = kmalloc(sizeof(struct hashlist_node), GFP_ATOMIC);
91 if (!hnode)
92 return false;
93 INIT_HLIST_NODE(&hnode->list);
94 hnode->pid = tgid;
95 hlist_add_head(&hnode->list, &head[tgid % count]);
96 return true;
97 }
98
rcu_lock_break(struct task_struct * g,struct task_struct * t)99 static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
100 {
101 bool can_cont = false;
102
103 get_task_struct(g);
104 get_task_struct(t);
105 rcu_read_unlock();
106 cond_resched();
107 rcu_read_lock();
108 can_cont = pid_alive(g) && pid_alive(t);
109 put_task_struct(t);
110 put_task_struct(g);
111 return can_cont;
112 }
113
rcu_break(int * max_count,int * batch_count,struct task_struct * g,struct task_struct * t)114 static bool rcu_break(int *max_count, int *batch_count,
115 struct task_struct *g,
116 struct task_struct *t)
117 {
118 if (!(*max_count)--)
119 return true;
120 if (!--(*batch_count)) {
121 *batch_count = HUNG_TASK_BATCHING;
122 if (!rcu_lock_break(g, t))
123 return true;
124 }
125 return false;
126 }
127
get_pid_by_name(const char * name)128 static pid_t get_pid_by_name(const char *name)
129 {
130 int max_count = PID_MAX_LIMIT;
131 int batch_count = HUNG_TASK_BATCHING;
132 struct task_struct *g = NULL;
133 struct task_struct *t = NULL;
134 int pid = 0;
135
136 rcu_read_lock();
137 do_each_thread(g, t) {
138 if (rcu_break(&max_count, &batch_count, g, t))
139 goto unlock;
140 if (!strncmp(t->comm, name, TASK_COMM_LEN)) {
141 pid = t->tgid;
142 goto unlock;
143 }
144 } while_each_thread(g, t);
145
146 unlock:
147 rcu_read_unlock();
148 return pid;
149 }
150
get_task_type(pid_t pid,pid_t tgid,struct task_struct * parent)151 static unsigned int get_task_type(pid_t pid, pid_t tgid, struct task_struct *parent)
152 {
153 unsigned int flag = TASK_TYPE_IGNORE;
154 /* check tgid of it's parent as PPID */
155 if (parent) {
156 pid_t ppid = parent->tgid;
157
158 if (ppid == PID_KTHREAD)
159 flag |= TASK_TYPE_KERNEL;
160 else if (ppid == appspawn_pid)
161 flag |= TASK_TYPE_APP;
162 else if (ppid == PID_INIT)
163 flag |= TASK_TYPE_NATIVE;
164 }
165 if (!whitelist_empty && hashlist_find(whitelist, WHITELIST_LEN, tgid))
166 flag |= TASK_TYPE_WHITE | TASK_TYPE_JANK;
167
168 return flag;
169 }
170
refresh_appspawn_pids(void)171 static void refresh_appspawn_pids(void)
172 {
173 int max_count = PID_MAX_LIMIT;
174 int batch_count = HUNG_TASK_BATCHING;
175 struct task_struct *g = NULL;
176 struct task_struct *t = NULL;
177
178 rcu_read_lock();
179 do_each_thread(g, t) {
180 if (rcu_break(&max_count, &batch_count, g, t))
181 goto unlock;
182 if (!strncmp(t->comm, "appspawn", TASK_COMM_LEN))
183 appspawn_pid = t->tgid;
184 } while_each_thread(g, t);
185 unlock:
186 rcu_read_unlock();
187 }
188
refresh_task_type(pid_t pid,int task_type)189 static void refresh_task_type(pid_t pid, int task_type)
190 {
191 struct task_item *item = NULL;
192 struct rb_node *p = NULL;
193
194 spin_lock(&list_tasks_lock);
195 for (p = rb_first(&list_tasks); p; p = rb_next(p)) {
196 item = rb_entry(p, struct task_item, node);
197 if (item->tgid == pid)
198 item->task_type = task_type;
199 }
200 spin_unlock(&list_tasks_lock);
201 }
202
refresh_whitelist_pids(void)203 static void refresh_whitelist_pids(void)
204 {
205 int i;
206
207 hashlist_clear(whitelist, WHITELIST_LEN);
208 for (i = 0; i < WHITELIST_LEN; i++) {
209 if (!strlen(whitetmplist[i].name))
210 continue;
211 whitetmplist[i].pid =
212 get_pid_by_name(whitetmplist[i].name);
213 if (!whitetmplist[i].pid)
214 continue;
215 refresh_task_type(whitetmplist[i].pid,
216 TASK_TYPE_WHITE | TASK_TYPE_JANK);
217 if (hashlist_insert(whitelist, WHITELIST_LEN,
218 whitetmplist[i].pid))
219 pr_info("whitelist[%d]-%s-%d\n", i,
220 whitetmplist[i].name, whitetmplist[i].pid);
221 else
222 pr_info("can't find %s\n", whitetmplist[i].name);
223 }
224 refresh_appspawn_pids();
225 }
226
find_task(pid_t pid,struct rb_root * root)227 static struct task_item *find_task(pid_t pid, struct rb_root *root)
228 {
229 struct rb_node **p = &root->rb_node;
230 struct task_item *cur = NULL;
231 struct rb_node *parent = NULL;
232
233 while (*p) {
234 parent = *p;
235 cur = rb_entry(parent, struct task_item, node);
236 if (!cur)
237 return NULL;
238 if (pid < cur->pid)
239 p = &(*p)->rb_left;
240 else if (pid > cur->pid)
241 p = &(*p)->rb_right;
242 else
243 return cur;
244 }
245 return NULL;
246 }
247
insert_task(struct task_item * item,struct rb_root * root)248 static bool insert_task(struct task_item *item, struct rb_root *root)
249 {
250 struct rb_node **p = &root->rb_node;
251 struct rb_node *parent = NULL;
252 struct task_item *cur = NULL;
253
254 while (*p) {
255 parent = *p;
256
257 cur = rb_entry(parent, struct task_item, node);
258 if (!cur)
259 return false;
260 if (item->pid < cur->pid) {
261 p = &(*p)->rb_left;
262 } else if (item->pid > cur->pid) {
263 p = &(*p)->rb_right;
264 } else {
265 pr_info("insert pid=%d,tgid=%d,name=%s,type=%d fail\n",
266 item->pid, item->tgid,
267 item->name, item->task_type);
268 return false;
269 }
270 }
271 rb_link_node(&item->node, parent, p);
272 rb_insert_color(&item->node, root);
273 return true;
274 }
275
show_block_task(struct task_item * taskitem,struct task_struct * p)276 void show_block_task(struct task_item *taskitem, struct task_struct *p)
277 {
278 unsigned long last_arrival;
279 unsigned long last_queued;
280
281 #ifdef CONFIG_SCHED_INFO
282 last_arrival = p->sched_info.last_arrival;
283 last_queued = p->sched_info.last_queued;
284 #else
285 last_arrival = 0;
286 last_queued = 0;
287 #endif /* CONFIG_SCHED_INFO */
288 if (unlikely(p->flags & PF_FROZEN)) {
289 if (taskitem)
290 pr_err("name=%s,PID=%d,tgid=%d,tgname=%s,"
291 "FROZEN for %ds,type=%d,la%lu/lq%lu\n",
292 p->comm, p->pid, p->tgid,
293 p->group_leader->comm,
294 taskitem->d_state_time * HEARTBEAT_TIME,
295 taskitem->task_type,
296 last_arrival, last_queued);
297 else
298 pr_err("name=%s,PID=%d,tgid=%d,tgname=%s,"
299 "just FROZE,la%lu/lq%lu\n",
300 p->comm, p->pid, p->tgid,
301 p->group_leader->comm,
302 last_arrival, last_queued);
303 } else {
304 if (taskitem)
305 pr_err("name=%s,PID=%d,tgid=%d,prio=%d,cpu=%d,tgname=%s,"
306 "type=%d,blocked for %ds,la%lu/lq%lu\n",
307 taskitem->name, taskitem->pid, p->tgid, p->prio,
308 task_cpu(p), p->group_leader->comm, taskitem->task_type,
309 taskitem->d_state_time * HEARTBEAT_TIME,
310 last_arrival, last_queued);
311 else
312 pr_err("name=%s,PID=%d,tgid=%d,prio=%d,cpu=%d,"
313 "tgname=%s,la%lu/lq%lu\n",
314 p->comm, p->pid, p->tgid, p->prio, task_cpu(p),
315 p->group_leader->comm,
316 last_arrival, last_queued);
317
318 sched_show_task(p);
319 }
320 }
321
htbase_show_state_filter(unsigned long state_filter)322 void htbase_show_state_filter(unsigned long state_filter)
323 {
324 struct task_struct *g = NULL;
325 struct task_struct *p = NULL;
326 struct task_item *taskitem = NULL;
327
328 #if BITS_PER_LONG == 32
329 pr_info(" task PC stack pid father\n");
330 #else
331 pr_info(" task PC stack pid father\n");
332 #endif
333 rcu_read_lock();
334 for_each_process_thread(g, p) {
335 /*
336 * reset the NMI-timeout, listing all files on a slow
337 * console might take a lot of time:
338 */
339 touch_nmi_watchdog();
340 if ((p->state == TASK_RUNNING) || (p->state & state_filter)) {
341 spin_lock(&list_tasks_lock);
342 taskitem = find_task(p->pid, &list_tasks);
343 spin_unlock(&list_tasks_lock);
344 show_block_task(taskitem, p);
345 }
346 }
347 touch_all_softlockup_watchdogs();
348 rcu_read_unlock();
349 /* Show locks if hungtask happen */
350 if ((state_filter == TASK_UNINTERRUPTIBLE) || !state_filter)
351 debug_show_all_locks();
352 }
353
hungtask_show_state_filter(unsigned long state_filter)354 void hungtask_show_state_filter(unsigned long state_filter)
355 {
356 pr_err("BinderChain_SysRq start\n");
357 htbase_show_state_filter(state_filter);
358 pr_err("BinderChain_SysRq end\n");
359 }
360
do_dump_task(struct task_struct * task)361 void do_dump_task(struct task_struct *task)
362 {
363 sched_show_task(task);
364 debug_show_held_locks(task);
365 }
366
do_show_task(struct task_struct * task,unsigned int flag,int d_state_time)367 void do_show_task(struct task_struct *task, unsigned int flag, int d_state_time)
368 {
369 pr_err("%s, flag=%d\n", __func__, flag);
370 rcu_read_lock();
371 if (!pid_alive(task)) {
372 rcu_read_unlock();
373 return;
374 }
375 if (flag & (FLAG_DUMP_WHITE | FLAG_DUMP_APP)) {
376 int cnt = 0;
377
378 trace_sched_process_hang(task);
379 cnt = d_state_time;
380 pr_err("INFO: task %s:%d tgid:%d blocked for %ds in %s\n",
381 task->comm, task->pid, task->tgid,
382 (HEARTBEAT_TIME * cnt),
383 (flag & FLAG_DUMP_WHITE) ? "whitelist" : "applist");
384 pr_err(" %s %s %.*s\n",
385 print_tainted(), init_utsname()->release,
386 (int)strcspn(init_utsname()->version, " "),
387 init_utsname()->version);
388 do_dump_task(task);
389 touch_nmi_watchdog();
390 if (flag & FLAG_DUMP_WHITE && (!dump_and_upload)) {
391 dump_and_upload++;
392 upload.pid = task->pid;
393 upload.tgid = task->tgid;
394 upload.duration = d_state_time;
395 memset(upload.name, 0, sizeof(upload.name));
396 strncpy(upload.name, task->comm, sizeof(upload.name));
397 upload.flag = flag;
398 if (task->flags & PF_FROZEN)
399 upload.flag = (upload.flag | FLAG_PF_FROZEN);
400 }
401 }
402 rcu_read_unlock();
403 }
404
do_panic(void)405 static void do_panic(void)
406 {
407 if (sysctl_hung_task_panic) {
408 trigger_all_cpu_backtrace();
409 panic("hungtask: blocked tasks");
410 }
411 }
412
create_taskitem(struct task_item * taskitem,struct task_struct * task)413 static void create_taskitem(struct task_item *taskitem,
414 struct task_struct *task)
415 {
416 taskitem->pid = task->pid;
417 taskitem->tgid = task->tgid;
418 memset(taskitem->name, 0, sizeof(taskitem->name));
419 strncpy(taskitem->name, task->comm, sizeof(taskitem->name));
420 taskitem->switch_count = task->nvcsw + task->nivcsw;
421 taskitem->dump_wa = 0; /* whitelist or applist task dump times */
422 taskitem->panic_wa = 0; /* whitelist or applist task panic times */
423 taskitem->d_state_time = -1;
424 taskitem->isdone_wa = true; /* if task in white or app dealed */
425 }
426
refresh_task(struct task_item * taskitem,struct task_struct * task)427 static bool refresh_task(struct task_item *taskitem, struct task_struct *task)
428 {
429 bool is_called = false;
430
431 if (taskitem->switch_count != (task->nvcsw + task->nivcsw)) {
432 taskitem->switch_count = task->nvcsw + task->nivcsw;
433 is_called = true;
434 return is_called;
435 }
436 if (taskitem->task_type & TASK_TYPE_WHITE) {
437 taskitem->isdone_wa = false;
438 taskitem->dump_wa++;
439 taskitem->panic_wa++;
440 }
441 taskitem->d_state_time++;
442 if (task->flags & PF_FROZEN)
443 taskitem->task_type |= TASK_TYPE_FROZEN;
444 return is_called;
445 }
446
remove_list_tasks(struct task_item * item)447 static void remove_list_tasks(struct task_item *item)
448 {
449 rb_erase(&item->node, &list_tasks);
450 kfree(item);
451 }
452
shrink_process_item(struct task_item * item,bool * is_finish)453 static void shrink_process_item(struct task_item *item, bool *is_finish)
454 {
455 if (remove_cnt >= MAX_REMOVE_LIST_NUM) {
456 int i;
457
458 remove_list[remove_cnt++] = item;
459 for (i = 0; i < remove_cnt; i++)
460 remove_list_tasks(remove_list[i]);
461 remove_cnt = 0;
462 *is_finish = false;
463 } else {
464 remove_list[remove_cnt++] = item;
465 }
466 }
467
shrink_list_tasks(void)468 static void shrink_list_tasks(void)
469 {
470 int i;
471 bool is_finish = false;
472 struct rb_node *n = NULL;
473 struct task_item *item = NULL;
474
475 spin_lock(&list_tasks_lock);
476 while (!is_finish) {
477 is_finish = true;
478 for (n = rb_first(&list_tasks); n != NULL; n = rb_next(n)) {
479 item = rb_entry(n, struct task_item, node);
480 if (!item)
481 continue;
482 if (item->isdone_wa) {
483 shrink_process_item(item, &is_finish);
484 if (!is_finish)
485 break;
486 }
487 }
488 }
489 for (i = 0; i < remove_cnt; i++)
490 remove_list_tasks(remove_list[i]);
491 remove_cnt = 0;
492 spin_unlock(&list_tasks_lock);
493 }
494
check_parameters(void)495 static void check_parameters(void)
496 {
497 if ((whitelist_dump_cnt < 0) ||
498 (whitelist_dump_cnt > DEFAULT_WHITE_DUMP_CNT))
499 whitelist_dump_cnt = DEFAULT_WHITE_DUMP_CNT;
500 if ((whitelist_panic_cnt <= 0) ||
501 (whitelist_panic_cnt > DEFAULT_WHITE_PANIC_CNT))
502 whitelist_panic_cnt = DEFAULT_WHITE_PANIC_CNT;
503 }
504
send_work_handler(struct work_struct * data)505 static void send_work_handler(struct work_struct *data)
506 {
507 #ifdef CONFIG_DFX_ZEROHUNG
508 zrhung_send_event(HUNGTASK_DOMAIN, HUNGTASK_NAME,
509 report_buf_text);
510 #endif
511 }
512
htbase_report_zrhung_event(const char * report_buf_tag)513 static void htbase_report_zrhung_event(const char *report_buf_tag)
514 {
515 htbase_show_state_filter(TASK_UNINTERRUPTIBLE);
516 pr_err("%s end\n", report_buf_tag);
517 schedule_work(&send_work);
518 report_zrhung_id++;
519 }
520
htbase_report_zrhung(unsigned int event)521 static void htbase_report_zrhung(unsigned int event)
522 {
523 bool report_load = false;
524 char report_buf_tag[REPORT_MSGLENGTH] = {0};
525 char report_name[TASK_COMM_LEN + 1] = {0};
526 int report_pid = 0;
527 int report_hungtime = 0;
528 int report_tasktype = 0;
529
530 if (!event)
531 return;
532 if (event & HUNGTASK_EVENT_WHITELIST) {
533 snprintf(report_buf_tag, sizeof(report_buf_tag),
534 "hungtask_whitelist_%d", report_zrhung_id);
535 strncpy(report_name, upload.name, TASK_COMM_LEN);
536 report_pid = upload.pid;
537 report_tasktype = TASK_TYPE_WHITE;
538 report_hungtime = whitelist_dump_cnt * HEARTBEAT_TIME;
539 report_load = true;
540 } else {
541 pr_err("No such event report to zerohung!");
542 }
543 pr_err("%s start\n", report_buf_tag);
544 if (event & HUNGTASK_EVENT_WHITELIST)
545 pr_err("report HUNGTASK_EVENT_WHITELIST to zrhung\n");
546 if (upload.flag & FLAG_PF_FROZEN)
547 snprintf(report_buf_text, sizeof(report_buf_text),
548 "Task %s(%s) pid %d type %d blocked %ds.",
549 report_name, "FROZEN", report_pid, report_tasktype, report_hungtime);
550 else
551 snprintf(report_buf_text, sizeof(report_buf_text),
552 "Task %s pid %d type %d blocked %ds.",
553 report_name, report_pid, report_tasktype, report_hungtime);
554 if (report_load)
555 htbase_report_zrhung_event(report_buf_tag);
556 }
557
print_frozen_list_item(int pid)558 static int print_frozen_list_item(int pid)
559 {
560 int tmp;
561
562 if (!frozed_head) {
563 tmp = snprintf(frozen_buf, FROZEN_BUF_LEN, "%s", "FROZEN Pid:");
564 if (tmp < 0)
565 return -1;
566 frozen_used += min(tmp, FROZEN_BUF_LEN - 1);
567 frozed_head = true;
568 }
569 tmp = snprintf(frozen_buf + frozen_used, FROZEN_BUF_LEN - frozen_used, "%d,",
570 pid);
571 if (tmp < 0)
572 return -1;
573 frozen_used += min(tmp, FROZEN_BUF_LEN - frozen_used - 1);
574 return frozen_used;
575 }
576
dump_task_wa(struct task_item * item,int dump_cnt,struct task_struct * task,unsigned int flag)577 int dump_task_wa(struct task_item *item, int dump_cnt,
578 struct task_struct *task, unsigned int flag)
579 {
580 int ret = 0;
581
582 if ((item->d_state_time > TWO_MINUTES) &&
583 (item->d_state_time % TWO_MINUTES != 0))
584 return ret;
585 if ((item->d_state_time > HUNG_TEN_MINUTES) &&
586 (item->d_state_time % HUNG_TEN_MINUTES != 0))
587 return ret;
588 if ((item->d_state_time > HUNG_ONE_HOUR) &&
589 (item->d_state_time % HUNG_ONE_HOUR != 0))
590 return ret;
591 if (dump_cnt && (item->dump_wa > dump_cnt)) {
592 item->dump_wa = 1;
593 if (!dump_and_upload && task->flags & PF_FROZEN) {
594 int tmp = print_frozen_list_item(item->pid);
595 if (tmp < 0)
596 return ret;
597 if (tmp >= FROZEN_BUF_LEN - 1) {
598 pr_err("%s", frozen_buf);
599 memset(frozen_buf, 0, sizeof(frozen_buf));
600 frozen_used = 0;
601 frozed_head = false;
602 print_frozen_list_item(item->pid);
603 }
604 } else if (!dump_and_upload) {
605 pr_err("Ready to dump a task %s\n", item->name);
606 do_show_task(task, flag, item->d_state_time);
607 ret++;
608 }
609 }
610 return ret;
611 }
612
update_panic_task(struct task_item * item)613 static void update_panic_task(struct task_item *item)
614 {
615 if (upload.pid != 0)
616 return;
617
618 upload.pid = item->pid;
619 upload.tgid = item->tgid;
620 memset(upload.name, 0, sizeof(upload.name));
621 strncpy(upload.name, item->name, sizeof(upload.name));
622 }
623
deal_task(struct task_item * item,struct task_struct * task,bool is_called)624 static void deal_task(struct task_item *item, struct task_struct *task, bool is_called)
625 {
626 int any_dumped_num = 0;
627
628 if (is_called) {
629 item->dump_wa = 1;
630 item->panic_wa = 1;
631 item->d_state_time = 0;
632 return;
633 }
634 if (item->task_type & TASK_TYPE_WHITE)
635 any_dumped_num = dump_task_wa(item, whitelist_dump_cnt, task,
636 FLAG_DUMP_WHITE);
637 if (!is_called && (item->task_type & TASK_TYPE_WHITE)) {
638 if (whitelist_panic_cnt && item->panic_wa > whitelist_panic_cnt) {
639 pr_err("Task %s is causing panic\n", item->name);
640 update_panic_task(item);
641 item->panic_wa = 0;
642 hung_task_must_panic++;
643 } else {
644 item->isdone_wa = false;
645 }
646 }
647 if (item->isdone_wa)
648 remove_list_tasks(item);
649 }
650
check_conditions(struct task_struct * task,unsigned int task_type)651 static bool check_conditions(struct task_struct *task, unsigned int task_type)
652 {
653 bool no_check = true;
654
655 if (task->flags & PF_FROZEN)
656 return no_check;
657 if (task_type & TASK_TYPE_WHITE &&
658 (whitelist_dump_cnt || whitelist_panic_cnt))
659 no_check = false;
660 return no_check;
661 }
662
htbase_check_one_task(struct task_struct * t)663 static void htbase_check_one_task(struct task_struct *t)
664 {
665 unsigned int task_type = TASK_TYPE_IGNORE;
666 unsigned long switch_count = t->nvcsw + t->nivcsw;
667 struct task_item *taskitem = NULL;
668 bool is_called = false;
669
670 if (unlikely(!switch_count)) {
671 pr_info("skip one's switch_count is zero\n");
672 return;
673 }
674
675 taskitem = find_task(t->pid, &list_tasks);
676 if (taskitem) {
677 if (check_conditions(t, taskitem->task_type))
678 return;
679 is_called = refresh_task(taskitem, t);
680 } else {
681 task_type = get_task_type(t->pid, t->tgid, t->real_parent);
682 if (check_conditions(t, task_type))
683 return;
684 taskitem = kmalloc(sizeof(*taskitem), GFP_ATOMIC);
685 if (!taskitem) {
686 pr_err("kmalloc failed");
687 return;
688 }
689 memset(taskitem, 0, sizeof(*taskitem));
690 taskitem->task_type = task_type;
691 create_taskitem(taskitem, t);
692 is_called = refresh_task(taskitem, t);
693 insert_task(taskitem, &list_tasks);
694 }
695 deal_task(taskitem, t, is_called);
696 }
697
htbase_pre_process(void)698 static void htbase_pre_process(void)
699 {
700 htbase_set_timeout_secs(sysctl_hung_task_timeout_secs);
701 cur_heartbeat++;
702 if ((cur_heartbeat % REFRESH_INTERVAL) == 0)
703 do_refresh = 1;
704 else
705 do_refresh = 0;
706 if (do_refresh || (cur_heartbeat < TIME_REFRESH_PIDS)) {
707 refresh_whitelist_pids();
708 check_parameters();
709 }
710 }
711
htbase_post_process(void)712 static void htbase_post_process(void)
713 {
714 struct rb_node *n = NULL;
715 unsigned int hungevent = 0;
716
717 if (frozen_used) {
718 pr_err("%s", frozen_buf);
719 memset(frozen_buf, 0, sizeof(frozen_buf));
720 frozen_used = 0;
721 frozed_head = false;
722 }
723 if (dump_and_upload == HUNG_TASK_UPLOAD_ONCE) {
724 hungevent |= HUNGTASK_EVENT_WHITELIST;
725 dump_and_upload++;
726 }
727 if (dump_and_upload > 0) {
728 time_since_upload++;
729 if (time_since_upload > (whitelist_panic_cnt - whitelist_dump_cnt)) {
730 dump_and_upload = 0;
731 time_since_upload = 0;
732 }
733 }
734 if (hung_task_must_panic) {
735 htbase_show_state_filter(TASK_UNINTERRUPTIBLE);
736 hung_task_must_panic = 0;
737 pr_err("Task %s:%d blocked for %ds is causing panic\n",
738 upload.name, upload.pid,
739 whitelist_panic_cnt * HEARTBEAT_TIME);
740 do_panic();
741 }
742 htuser_post_process_userlist();
743 shrink_list_tasks();
744 for (n = rb_first(&list_tasks); n != NULL; n = rb_next(n)) {
745 struct task_item *item = rb_entry(n, struct task_item, node);
746 item->isdone_wa = true;
747 }
748
749 if (hungevent)
750 htbase_report_zrhung(hungevent);
751 }
752
htbase_check_tasks(unsigned long timeout)753 void htbase_check_tasks(unsigned long timeout)
754 {
755 int max_count = PID_MAX_LIMIT;
756 int batch_count = HUNG_TASK_BATCHING;
757 struct task_struct *g = NULL;
758 struct task_struct *t = NULL;
759
760 if (!hungtask_enable)
761 return;
762 if (test_taint(TAINT_DIE) || did_panic) {
763 pr_err("already in doing panic\n");
764 return;
765 }
766
767 htbase_pre_process();
768 rcu_read_lock();
769 for_each_process_thread(g, t) {
770 if (!max_count--)
771 goto unlock;
772 if (!--batch_count) {
773 batch_count = HUNG_TASK_BATCHING;
774 if (!rcu_lock_break(g, t))
775 goto unlock;
776 }
777 if ((t->state == TASK_UNINTERRUPTIBLE) ||
778 (t->state == TASK_KILLABLE))
779 htbase_check_one_task(t);
780 }
781 unlock:
782 rcu_read_unlock();
783 htbase_post_process();
784 }
785
htbase_enable_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)786 static ssize_t htbase_enable_show(struct kobject *kobj,
787 struct kobj_attribute *attr,
788 char *buf)
789 {
790 if (hungtask_enable)
791 return snprintf(buf, ENABLE_SHOW_LEN, "on\n");
792 else
793 return snprintf(buf, ENABLE_SHOW_LEN, "off\n");
794 }
795
htbase_enable_store(struct kobject * kobj,struct kobj_attribute * attr,const char * buf,size_t count)796 static ssize_t htbase_enable_store(struct kobject *kobj,
797 struct kobj_attribute *attr,
798 const char *buf, size_t count)
799 {
800 char tmp[6]; /* only storage "on" "off" "kick" and enter */
801 size_t len;
802 char *p = NULL;
803
804 if (!buf)
805 return -EINVAL;
806 if ((count < 2) || (count > (sizeof(tmp) - 1))) {
807 pr_err("string too long or too short\n");
808 return -EINVAL;
809 }
810
811 p = memchr(buf, '\n', count);
812 len = p ? (size_t)(p - buf) : count;
813 memset(tmp, 0, sizeof(tmp));
814 strncpy(tmp, buf, len);
815 if (!strncmp(tmp, "on", strlen(tmp))) {
816 hungtask_enable = HT_ENABLE;
817 pr_info("set hungtask_enable to enable\n");
818 } else if (!strncmp(tmp, "off", strlen(tmp))) {
819 hungtask_enable = HT_DISABLE;
820 pr_info("set hungtask_enable to disable\n");
821 } else {
822 pr_err("only accept on or off\n");
823 }
824 return (ssize_t) count;
825 }
826
htbase_monitorlist_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)827 static ssize_t htbase_monitorlist_show(struct kobject *kobj,
828 struct kobj_attribute *attr,
829 char *buf)
830 {
831 int i;
832 char *start = buf;
833 char all_buf[WHITELIST_STORE_LEN - 20]; /* exclude extra header len 20*/
834 unsigned long len = 0;
835
836 memset(all_buf, 0, sizeof(all_buf));
837 for (i = 0; i < WHITELIST_LEN; i++) {
838 if (whitetmplist[i].pid > 0) {
839 len += snprintf(all_buf + len, sizeof(all_buf) - len,
840 "%s-%d,", whitetmplist[i].name, whitetmplist[i].pid);
841 if (!(len < sizeof(all_buf))) {
842 len = sizeof(all_buf) - 1;
843 break;
844 }
845 }
846 }
847 if (len > 0)
848 all_buf[len] = 0;
849 if (whitelist_type == WHITE_LIST)
850 buf += snprintf(buf, WHITELIST_STORE_LEN, "whitelist:[%s]\n", all_buf);
851 else if (whitelist_type == BLACK_LIST)
852 buf += snprintf(buf, WHITELIST_STORE_LEN, "blacklist:[%s]\n", all_buf);
853 else
854 buf += snprintf(buf, WHITELIST_STORE_LEN, "\n");
855 return buf - start;
856 }
857
htbase_monitorlist_update(char ** cur)858 static void htbase_monitorlist_update(char **cur)
859 {
860 int index = 0;
861 char *token = NULL;
862
863 hashlist_clear(whitelist, WHITELIST_LEN);
864 memset(whitetmplist, 0, sizeof(whitetmplist));
865 /* generate the new whitelist */
866 for (; ; ) {
867 token = strsep(cur, ",");
868 if (token && strlen(token)) {
869 strncpy(whitetmplist[index].name, token, TASK_COMM_LEN);
870 if (strlen(whitetmplist[index].name) > 0)
871 whitelist_empty = false;
872 index++;
873 if (index >= WHITELIST_LEN)
874 break;
875 }
876 if (!(*cur))
877 break;
878 }
879 }
880
881 /*
882 * monitorlist_store - Called when 'write/echo' method is
883 * used on entry '/sys/kernel/hungtask/monitorlist'.
884 */
htbase_monitorlist_store(struct kobject * kobj,struct kobj_attribute * attr,const char * buf,size_t n)885 static ssize_t htbase_monitorlist_store(struct kobject *kobj,
886 struct kobj_attribute *attr,
887 const char *buf, size_t n)
888 {
889 size_t len;
890 char *p = NULL;
891 char all_buf[WHITELIST_STORE_LEN];
892 char *cur = all_buf;
893
894
895 if ((n < 2) || (n > (sizeof(all_buf) - 1))) {
896 pr_err("whitelist input string illegal\n");
897 return -EINVAL;
898 }
899 if (!buf)
900 return -EINVAL;
901 /*
902 * input format:
903 * write /sys/kernel/hungtask/monitorlist "whitelist,
904 * system_server,surfaceflinger"
905 */
906 p = memchr(buf, '\n', n);
907 len = p ? (size_t)(p - buf) : n; /* exclude the '\n' */
908
909 memset(all_buf, 0, sizeof(all_buf));
910 len = len > WHITELIST_STORE_LEN ? WHITELIST_STORE_LEN : len;
911 strncpy(all_buf, buf, len);
912 p = strsep(&cur, ",");
913 if (!cur) {
914 pr_err("string is not correct\n");
915 return -EINVAL;
916 }
917 if (!strncmp(p, "whitelist", n)) {
918 whitelist_type = WHITE_LIST;
919 } else {
920 if (!strncmp(p, "blacklist", n))
921 pr_err("blacklist is not support\n");
922 else
923 pr_err("wrong list type is set\n");
924 return -EINVAL;
925 }
926 if (!strlen(cur)) {
927 pr_err("at least one process need to be set\n");
928 return -EINVAL;
929 }
930 pr_err("whitelist is %s\n", cur);
931
932 htbase_monitorlist_update(&cur);
933 /* check again in case user input "whitelist,,,,,," */
934 if (whitelist_empty) {
935 pr_err("at least one process need to be set\n");
936 return -EINVAL;
937 }
938 return (ssize_t) n;
939 }
940
941 /* used for sysctl at "/proc/sys/kernel/hung_task_timeout_secs" */
htbase_set_timeout_secs(unsigned long new_hungtask_timeout_secs)942 void htbase_set_timeout_secs(unsigned long new_hungtask_timeout_secs)
943 {
944 if ((new_hungtask_timeout_secs > CONFIG_DEFAULT_HUNG_TASK_TIMEOUT) ||
945 (new_hungtask_timeout_secs % HEARTBEAT_TIME))
946 return;
947 hungtask_timeout_secs = new_hungtask_timeout_secs;
948 /*
949 * if user change panic timeout value, we sync it to dump value
950 * defaultly, user can set it diffrently
951 */
952 whitelist_panic_cnt = (int)(hungtask_timeout_secs / HEARTBEAT_TIME);
953 if (whitelist_panic_cnt > THIRTY_SECONDS)
954 whitelist_dump_cnt = whitelist_panic_cnt / HT_DUMP_IN_PANIC_LOOSE;
955 else
956 whitelist_dump_cnt = whitelist_panic_cnt / HT_DUMP_IN_PANIC_STRICT;
957 }
958
htbase_set_panic(int new_did_panic)959 void htbase_set_panic(int new_did_panic)
960 {
961 did_panic = new_did_panic;
962 }
963
964 static struct kobj_attribute timeout_attribute = {
965 .attr = {
966 .name = "enable",
967 .mode = 0640,
968 },
969 .show = htbase_enable_show,
970 .store = htbase_enable_store,
971 };
972
973 static struct kobj_attribute monitorlist_attr = {
974 .attr = {
975 .name = "monitorlist",
976 .mode = 0640,
977 },
978 .show = htbase_monitorlist_show,
979 .store = htbase_monitorlist_store,
980 };
981
982 #ifdef CONFIG_DFX_HUNGTASK_USER
983 static struct kobj_attribute userlist_attr = {
984 .attr = {
985 .name = "userlist",
986 .mode = 0640,
987 },
988 .show = htuser_list_show,
989 .store = htuser_list_store,
990 };
991 #endif
992
993 static struct attribute *attrs[] = {
994 &timeout_attribute.attr,
995 &monitorlist_attr.attr,
996 #ifdef CONFIG_DFX_HUNGTASK_USER
997 &userlist_attr.attr,
998 #endif
999 NULL
1000 };
1001
1002 static struct attribute_group hungtask_attr_group = {
1003 .attrs = attrs,
1004 };
1005
1006 static struct kobject *hungtask_kobj;
htbase_create_sysfs(void)1007 int htbase_create_sysfs(void)
1008 {
1009 int i;
1010 int ret;
1011
1012 /* sleep 1000ms and wait /sys/kernel ready */
1013 while (!kernel_kobj)
1014 msleep(1000);
1015
1016 /* Create kobject named "hungtask" located at /sys/kernel/huangtask */
1017 hungtask_kobj = kobject_create_and_add("hungtask", kernel_kobj);
1018 if (!hungtask_kobj)
1019 return -ENOMEM;
1020 ret = sysfs_create_group(hungtask_kobj, &hungtask_attr_group);
1021 if (ret)
1022 kobject_put(hungtask_kobj);
1023
1024 for (i = 0; i < WHITELIST_LEN; i++)
1025 INIT_HLIST_HEAD(&whitelist[i]);
1026 memset(whitetmplist, 0, sizeof(whitetmplist));
1027
1028 INIT_WORK(&send_work, send_work_handler);
1029
1030 return ret;
1031 }
1032