1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Stack trace utility functions etc.
5 *
6 * Copyright 2008 Christoph Hellwig, IBM Corp.
7 * Copyright 2018 SUSE Linux GmbH
8 * Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp.
9 */
10
11 #include <linux/delay.h>
12 #include <linux/export.h>
13 #include <linux/kallsyms.h>
14 #include <linux/module.h>
15 #include <linux/nmi.h>
16 #include <linux/sched.h>
17 #include <linux/sched/debug.h>
18 #include <linux/sched/task_stack.h>
19 #include <linux/stacktrace.h>
20 #include <asm/ptrace.h>
21 #include <asm/processor.h>
22 #include <linux/ftrace.h>
23 #include <linux/delay.h>
24 #include <asm/kprobes.h>
25
26 #include <asm/paca.h>
27
28 /*
29 * Save stack-backtrace addresses into a stack_trace buffer.
30 */
save_context_stack(struct stack_trace * trace,unsigned long sp,struct task_struct * tsk,int savesched)31 static void save_context_stack(struct stack_trace *trace, unsigned long sp,
32 struct task_struct *tsk, int savesched)
33 {
34 for (;;) {
35 unsigned long *stack = (unsigned long *) sp;
36 unsigned long newsp, ip;
37
38 if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
39 return;
40
41 newsp = stack[0];
42 ip = stack[STACK_FRAME_LR_SAVE];
43
44 if (savesched || !in_sched_functions(ip)) {
45 if (!trace->skip)
46 trace->entries[trace->nr_entries++] = ip;
47 else
48 trace->skip--;
49 }
50
51 if (trace->nr_entries >= trace->max_entries)
52 return;
53
54 sp = newsp;
55 }
56 }
57
save_stack_trace(struct stack_trace * trace)58 void save_stack_trace(struct stack_trace *trace)
59 {
60 unsigned long sp;
61
62 sp = current_stack_frame();
63
64 save_context_stack(trace, sp, current, 1);
65 }
66 EXPORT_SYMBOL_GPL(save_stack_trace);
67
save_stack_trace_tsk(struct task_struct * tsk,struct stack_trace * trace)68 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
69 {
70 unsigned long sp;
71
72 if (!try_get_task_stack(tsk))
73 return;
74
75 if (tsk == current)
76 sp = current_stack_frame();
77 else
78 sp = tsk->thread.ksp;
79
80 save_context_stack(trace, sp, tsk, 0);
81
82 put_task_stack(tsk);
83 }
84 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
85
86 void
save_stack_trace_regs(struct pt_regs * regs,struct stack_trace * trace)87 save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
88 {
89 save_context_stack(trace, regs->gpr[1], current, 0);
90 }
91 EXPORT_SYMBOL_GPL(save_stack_trace_regs);
92
93 #ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
94 /*
95 * This function returns an error if it detects any unreliable features of the
96 * stack. Otherwise it guarantees that the stack trace is reliable.
97 *
98 * If the task is not 'current', the caller *must* ensure the task is inactive.
99 */
__save_stack_trace_tsk_reliable(struct task_struct * tsk,struct stack_trace * trace)100 static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
101 struct stack_trace *trace)
102 {
103 unsigned long sp;
104 unsigned long newsp;
105 unsigned long stack_page = (unsigned long)task_stack_page(tsk);
106 unsigned long stack_end;
107 int graph_idx = 0;
108 bool firstframe;
109
110 stack_end = stack_page + THREAD_SIZE;
111 if (!is_idle_task(tsk)) {
112 /*
113 * For user tasks, this is the SP value loaded on
114 * kernel entry, see "PACAKSAVE(r13)" in _switch() and
115 * system_call_common()/EXCEPTION_PROLOG_COMMON().
116 *
117 * Likewise for non-swapper kernel threads,
118 * this also happens to be the top of the stack
119 * as setup by copy_thread().
120 *
121 * Note that stack backlinks are not properly setup by
122 * copy_thread() and thus, a forked task() will have
123 * an unreliable stack trace until it's been
124 * _switch()'ed to for the first time.
125 */
126 stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
127 } else {
128 /*
129 * idle tasks have a custom stack layout,
130 * c.f. cpu_idle_thread_init().
131 */
132 stack_end -= STACK_FRAME_OVERHEAD;
133 }
134
135 if (tsk == current)
136 sp = current_stack_frame();
137 else
138 sp = tsk->thread.ksp;
139
140 if (sp < stack_page + sizeof(struct thread_struct) ||
141 sp > stack_end - STACK_FRAME_MIN_SIZE) {
142 return -EINVAL;
143 }
144
145 for (firstframe = true; sp != stack_end;
146 firstframe = false, sp = newsp) {
147 unsigned long *stack = (unsigned long *) sp;
148 unsigned long ip;
149
150 /* sanity check: ABI requires SP to be aligned 16 bytes. */
151 if (sp & 0xF)
152 return -EINVAL;
153
154 newsp = stack[0];
155 /* Stack grows downwards; unwinder may only go up. */
156 if (newsp <= sp)
157 return -EINVAL;
158
159 if (newsp != stack_end &&
160 newsp > stack_end - STACK_FRAME_MIN_SIZE) {
161 return -EINVAL; /* invalid backlink, too far up. */
162 }
163
164 /*
165 * We can only trust the bottom frame's backlink, the
166 * rest of the frame may be uninitialized, continue to
167 * the next.
168 */
169 if (firstframe)
170 continue;
171
172 /* Mark stacktraces with exception frames as unreliable. */
173 if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
174 stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
175 return -EINVAL;
176 }
177
178 /* Examine the saved LR: it must point into kernel code. */
179 ip = stack[STACK_FRAME_LR_SAVE];
180 if (!__kernel_text_address(ip))
181 return -EINVAL;
182
183 /*
184 * FIXME: IMHO these tests do not belong in
185 * arch-dependent code, they are generic.
186 */
187 ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack);
188 #ifdef CONFIG_KPROBES
189 /*
190 * Mark stacktraces with kretprobed functions on them
191 * as unreliable.
192 */
193 if (ip == (unsigned long)kretprobe_trampoline)
194 return -EINVAL;
195 #endif
196
197 if (trace->nr_entries >= trace->max_entries)
198 return -E2BIG;
199 if (!trace->skip)
200 trace->entries[trace->nr_entries++] = ip;
201 else
202 trace->skip--;
203 }
204 return 0;
205 }
206
save_stack_trace_tsk_reliable(struct task_struct * tsk,struct stack_trace * trace)207 int save_stack_trace_tsk_reliable(struct task_struct *tsk,
208 struct stack_trace *trace)
209 {
210 int ret;
211
212 /*
213 * If the task doesn't have a stack (e.g., a zombie), the stack is
214 * "reliably" empty.
215 */
216 if (!try_get_task_stack(tsk))
217 return 0;
218
219 ret = __save_stack_trace_tsk_reliable(tsk, trace);
220
221 put_task_stack(tsk);
222
223 return ret;
224 }
225 #endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
226
227 #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
handle_backtrace_ipi(struct pt_regs * regs)228 static void handle_backtrace_ipi(struct pt_regs *regs)
229 {
230 nmi_cpu_backtrace(regs);
231 }
232
raise_backtrace_ipi(cpumask_t * mask)233 static void raise_backtrace_ipi(cpumask_t *mask)
234 {
235 struct paca_struct *p;
236 unsigned int cpu;
237 u64 delay_us;
238
239 for_each_cpu(cpu, mask) {
240 if (cpu == smp_processor_id()) {
241 handle_backtrace_ipi(NULL);
242 continue;
243 }
244
245 delay_us = 5 * USEC_PER_SEC;
246
247 if (smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, delay_us)) {
248 // Now wait up to 5s for the other CPU to do its backtrace
249 while (cpumask_test_cpu(cpu, mask) && delay_us) {
250 udelay(1);
251 delay_us--;
252 }
253
254 // Other CPU cleared itself from the mask
255 if (delay_us)
256 continue;
257 }
258
259 p = paca_ptrs[cpu];
260
261 cpumask_clear_cpu(cpu, mask);
262
263 pr_warn("CPU %d didn't respond to backtrace IPI, inspecting paca.\n", cpu);
264 if (!virt_addr_valid(p)) {
265 pr_warn("paca pointer appears corrupt? (%px)\n", p);
266 continue;
267 }
268
269 pr_warn("irq_soft_mask: 0x%02x in_mce: %d in_nmi: %d",
270 p->irq_soft_mask, p->in_mce, p->in_nmi);
271
272 if (virt_addr_valid(p->__current))
273 pr_cont(" current: %d (%s)\n", p->__current->pid,
274 p->__current->comm);
275 else
276 pr_cont(" current pointer corrupt? (%px)\n", p->__current);
277
278 pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1);
279 show_stack(p->__current, (unsigned long *)p->saved_r1, KERN_WARNING);
280 }
281 }
282
arch_trigger_cpumask_backtrace(const cpumask_t * mask,bool exclude_self)283 void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
284 {
285 nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
286 }
287 #endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */
288