• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/debugfs.h>
3 #include <linux/mm.h>
4 #include <linux/slab.h>
5 #include <linux/uaccess.h>
6 #include <linux/memblock.h>
7 #include <linux/stacktrace.h>
8 #include <linux/page_owner.h>
9 #include <linux/jump_label.h>
10 #include <linux/migrate.h>
11 #include <linux/stackdepot.h>
12 #include <linux/seq_file.h>
13 #include <linux/memcontrol.h>
14 #include <linux/sched/clock.h>
15 
16 #include "internal.h"
17 
18 /*
19  * TODO: teach PAGE_OWNER_STACK_DEPTH (__dump_page_owner and save_stack)
20  * to use off stack temporal storage
21  */
22 #define PAGE_OWNER_STACK_DEPTH (16)
23 
24 struct page_owner {
25 	unsigned short order;
26 	short last_migrate_reason;
27 	gfp_t gfp_mask;
28 	depot_stack_handle_t handle;
29 	depot_stack_handle_t free_handle;
30 	u64 ts_nsec;
31 	u64 free_ts_nsec;
32 	char comm[TASK_COMM_LEN];
33 	pid_t pid;
34 	pid_t tgid;
35 	pid_t free_pid;
36 	pid_t free_tgid;
37 };
38 
39 static bool page_owner_enabled __initdata;
40 DEFINE_STATIC_KEY_FALSE(page_owner_inited);
41 
42 static depot_stack_handle_t dummy_handle;
43 static depot_stack_handle_t failure_handle;
44 static depot_stack_handle_t early_handle;
45 
46 static void init_early_allocated_pages(void);
47 
early_page_owner_param(char * buf)48 static int __init early_page_owner_param(char *buf)
49 {
50 	int ret = kstrtobool(buf, &page_owner_enabled);
51 
52 	if (page_owner_enabled)
53 		stack_depot_request_early_init();
54 
55 	return ret;
56 }
57 early_param("page_owner", early_page_owner_param);
58 
need_page_owner(void)59 static __init bool need_page_owner(void)
60 {
61 	return page_owner_enabled;
62 }
63 
create_dummy_stack(void)64 static __always_inline depot_stack_handle_t create_dummy_stack(void)
65 {
66 	unsigned long entries[4];
67 	unsigned int nr_entries;
68 
69 	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
70 	return stack_depot_save(entries, nr_entries, GFP_KERNEL);
71 }
72 
register_dummy_stack(void)73 static noinline void register_dummy_stack(void)
74 {
75 	dummy_handle = create_dummy_stack();
76 }
77 
register_failure_stack(void)78 static noinline void register_failure_stack(void)
79 {
80 	failure_handle = create_dummy_stack();
81 }
82 
register_early_stack(void)83 static noinline void register_early_stack(void)
84 {
85 	early_handle = create_dummy_stack();
86 }
87 
init_page_owner(void)88 static __init void init_page_owner(void)
89 {
90 	if (!page_owner_enabled)
91 		return;
92 
93 	register_dummy_stack();
94 	register_failure_stack();
95 	register_early_stack();
96 	static_branch_enable(&page_owner_inited);
97 	init_early_allocated_pages();
98 }
99 
100 struct page_ext_operations page_owner_ops = {
101 	.size = sizeof(struct page_owner),
102 	.need = need_page_owner,
103 	.init = init_page_owner,
104 	.need_shared_flags = true,
105 };
106 
get_page_owner(struct page_ext * page_ext)107 static inline struct page_owner *get_page_owner(struct page_ext *page_ext)
108 {
109 	return page_ext_data(page_ext, &page_owner_ops);
110 }
111 
get_page_owner_handle(struct page_ext * page_ext,unsigned long pfn)112 depot_stack_handle_t get_page_owner_handle(struct page_ext *page_ext, unsigned long pfn)
113 {
114 	struct page_owner *page_owner;
115 	depot_stack_handle_t handle;
116 
117 	if (!static_branch_unlikely(&page_owner_inited))
118 		return 0;
119 
120 	page_owner = get_page_owner(page_ext);
121 
122 	/* skip handle for tail pages of higher order allocations */
123 	if (!IS_ALIGNED(pfn, 1 << page_owner->order))
124 		return 0;
125 
126 	handle = READ_ONCE(page_owner->handle);
127 	return handle;
128 }
129 EXPORT_SYMBOL_NS_GPL(get_page_owner_handle, MINIDUMP);
130 
save_stack(gfp_t flags)131 static noinline depot_stack_handle_t save_stack(gfp_t flags)
132 {
133 	unsigned long entries[PAGE_OWNER_STACK_DEPTH];
134 	depot_stack_handle_t handle;
135 	unsigned int nr_entries;
136 
137 	/*
138 	 * Avoid recursion.
139 	 *
140 	 * Sometimes page metadata allocation tracking requires more
141 	 * memory to be allocated:
142 	 * - when new stack trace is saved to stack depot
143 	 * - when backtrace itself is calculated (ia64)
144 	 */
145 	if (current->in_page_owner)
146 		return dummy_handle;
147 	current->in_page_owner = 1;
148 
149 	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
150 	handle = stack_depot_save(entries, nr_entries, flags);
151 	if (!handle)
152 		handle = failure_handle;
153 
154 	current->in_page_owner = 0;
155 	return handle;
156 }
157 
__reset_page_owner(struct page * page,unsigned short order)158 void __reset_page_owner(struct page *page, unsigned short order)
159 {
160 	int i;
161 	struct page_ext *page_ext;
162 	depot_stack_handle_t handle;
163 	struct page_owner *page_owner;
164 	u64 free_ts_nsec = local_clock();
165 
166 	page_ext = page_ext_get(page);
167 	if (unlikely(!page_ext))
168 		return;
169 
170 	handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
171 	for (i = 0; i < (1 << order); i++) {
172 		__clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
173 		page_owner = get_page_owner(page_ext);
174 		page_owner->free_handle = handle;
175 		page_owner->free_ts_nsec = free_ts_nsec;
176 		page_owner->free_pid = current->pid;
177 		page_owner->free_tgid = current->tgid;
178 		page_ext = page_ext_next(page_ext);
179 	}
180 	page_ext_put(page_ext);
181 }
182 
__set_page_owner_handle(struct page_ext * page_ext,depot_stack_handle_t handle,unsigned short order,gfp_t gfp_mask)183 static inline void __set_page_owner_handle(struct page_ext *page_ext,
184 					depot_stack_handle_t handle,
185 					unsigned short order, gfp_t gfp_mask)
186 {
187 	struct page_owner *page_owner;
188 	int i;
189 	u64 ts_nsec = local_clock();
190 
191 	for (i = 0; i < (1 << order); i++) {
192 		page_owner = get_page_owner(page_ext);
193 		page_owner->handle = handle;
194 		page_owner->order = order;
195 		page_owner->gfp_mask = gfp_mask;
196 		page_owner->last_migrate_reason = -1;
197 		page_owner->pid = current->pid;
198 		page_owner->tgid = current->tgid;
199 		page_owner->ts_nsec = ts_nsec;
200 		strscpy(page_owner->comm, current->comm,
201 			sizeof(page_owner->comm));
202 		__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
203 		__set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
204 
205 		page_ext = page_ext_next(page_ext);
206 	}
207 }
208 
__set_page_owner(struct page * page,unsigned short order,gfp_t gfp_mask)209 noinline void __set_page_owner(struct page *page, unsigned short order,
210 					gfp_t gfp_mask)
211 {
212 	struct page_ext *page_ext;
213 	depot_stack_handle_t handle;
214 
215 	handle = save_stack(gfp_mask);
216 
217 	page_ext = page_ext_get(page);
218 	if (unlikely(!page_ext))
219 		return;
220 	__set_page_owner_handle(page_ext, handle, order, gfp_mask);
221 	page_ext_put(page_ext);
222 }
223 
__set_page_owner_migrate_reason(struct page * page,int reason)224 void __set_page_owner_migrate_reason(struct page *page, int reason)
225 {
226 	struct page_ext *page_ext = page_ext_get(page);
227 	struct page_owner *page_owner;
228 
229 	if (unlikely(!page_ext))
230 		return;
231 
232 	page_owner = get_page_owner(page_ext);
233 	page_owner->last_migrate_reason = reason;
234 	page_ext_put(page_ext);
235 }
236 
__split_page_owner(struct page * page,unsigned int nr)237 void __split_page_owner(struct page *page, unsigned int nr)
238 {
239 	int i;
240 	struct page_ext *page_ext = page_ext_get(page);
241 	struct page_owner *page_owner;
242 
243 	if (unlikely(!page_ext))
244 		return;
245 
246 	for (i = 0; i < nr; i++) {
247 		page_owner = get_page_owner(page_ext);
248 		page_owner->order = 0;
249 		page_ext = page_ext_next(page_ext);
250 	}
251 	page_ext_put(page_ext);
252 }
253 
__folio_copy_owner(struct folio * newfolio,struct folio * old)254 void __folio_copy_owner(struct folio *newfolio, struct folio *old)
255 {
256 	struct page_ext *old_ext;
257 	struct page_ext *new_ext;
258 	struct page_owner *old_page_owner, *new_page_owner;
259 
260 	old_ext = page_ext_get(&old->page);
261 	if (unlikely(!old_ext))
262 		return;
263 
264 	new_ext = page_ext_get(&newfolio->page);
265 	if (unlikely(!new_ext)) {
266 		page_ext_put(old_ext);
267 		return;
268 	}
269 
270 	old_page_owner = get_page_owner(old_ext);
271 	new_page_owner = get_page_owner(new_ext);
272 	new_page_owner->order = old_page_owner->order;
273 	new_page_owner->gfp_mask = old_page_owner->gfp_mask;
274 	new_page_owner->last_migrate_reason =
275 		old_page_owner->last_migrate_reason;
276 	new_page_owner->handle = old_page_owner->handle;
277 	new_page_owner->pid = old_page_owner->pid;
278 	new_page_owner->tgid = old_page_owner->tgid;
279 	new_page_owner->free_pid = old_page_owner->free_pid;
280 	new_page_owner->free_tgid = old_page_owner->free_tgid;
281 	new_page_owner->ts_nsec = old_page_owner->ts_nsec;
282 	new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
283 	strcpy(new_page_owner->comm, old_page_owner->comm);
284 
285 	/*
286 	 * We don't clear the bit on the old folio as it's going to be freed
287 	 * after migration. Until then, the info can be useful in case of
288 	 * a bug, and the overall stats will be off a bit only temporarily.
289 	 * Also, migrate_misplaced_transhuge_page() can still fail the
290 	 * migration and then we want the old folio to retain the info. But
291 	 * in that case we also don't need to explicitly clear the info from
292 	 * the new page, which will be freed.
293 	 */
294 	__set_bit(PAGE_EXT_OWNER, &new_ext->flags);
295 	__set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
296 	page_ext_put(new_ext);
297 	page_ext_put(old_ext);
298 }
299 
pagetypeinfo_showmixedcount_print(struct seq_file * m,pg_data_t * pgdat,struct zone * zone)300 void pagetypeinfo_showmixedcount_print(struct seq_file *m,
301 				       pg_data_t *pgdat, struct zone *zone)
302 {
303 	struct page *page;
304 	struct page_ext *page_ext;
305 	struct page_owner *page_owner;
306 	unsigned long pfn, block_end_pfn;
307 	unsigned long end_pfn = zone_end_pfn(zone);
308 	unsigned long count[MIGRATE_TYPES] = { 0, };
309 	int pageblock_mt, page_mt;
310 	int i;
311 
312 	/* Scan block by block. First and last block may be incomplete */
313 	pfn = zone->zone_start_pfn;
314 
315 	/*
316 	 * Walk the zone in pageblock_nr_pages steps. If a page block spans
317 	 * a zone boundary, it will be double counted between zones. This does
318 	 * not matter as the mixed block count will still be correct
319 	 */
320 	for (; pfn < end_pfn; ) {
321 		page = pfn_to_online_page(pfn);
322 		if (!page) {
323 			pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
324 			continue;
325 		}
326 
327 		block_end_pfn = pageblock_end_pfn(pfn);
328 		block_end_pfn = min(block_end_pfn, end_pfn);
329 
330 		pageblock_mt = get_pageblock_migratetype(page);
331 
332 		for (; pfn < block_end_pfn; pfn++) {
333 			/* The pageblock is online, no need to recheck. */
334 			page = pfn_to_page(pfn);
335 
336 			if (page_zone(page) != zone)
337 				continue;
338 
339 			if (PageBuddy(page)) {
340 				unsigned long freepage_order;
341 
342 				freepage_order = buddy_order_unsafe(page);
343 				if (freepage_order <= MAX_ORDER)
344 					pfn += (1UL << freepage_order) - 1;
345 				continue;
346 			}
347 
348 			if (PageReserved(page))
349 				continue;
350 
351 			page_ext = page_ext_get(page);
352 			if (unlikely(!page_ext))
353 				continue;
354 
355 			if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
356 				goto ext_put_continue;
357 
358 			page_owner = get_page_owner(page_ext);
359 			page_mt = gfp_migratetype(page_owner->gfp_mask);
360 			if (pageblock_mt != page_mt) {
361 				if (is_migrate_cma(pageblock_mt))
362 					count[MIGRATE_MOVABLE]++;
363 				else
364 					count[pageblock_mt]++;
365 
366 				pfn = block_end_pfn;
367 				page_ext_put(page_ext);
368 				break;
369 			}
370 			pfn += (1UL << page_owner->order) - 1;
371 ext_put_continue:
372 			page_ext_put(page_ext);
373 		}
374 	}
375 
376 	/* Print counts */
377 	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
378 	for (i = 0; i < MIGRATE_TYPES; i++)
379 		seq_printf(m, "%12lu ", count[i]);
380 	seq_putc(m, '\n');
381 }
382 
383 /*
384  * Looking for memcg information and print it out
385  */
print_page_owner_memcg(char * kbuf,size_t count,int ret,struct page * page)386 static inline int print_page_owner_memcg(char *kbuf, size_t count, int ret,
387 					 struct page *page)
388 {
389 #ifdef CONFIG_MEMCG
390 	unsigned long memcg_data;
391 	struct mem_cgroup *memcg;
392 	bool online;
393 	char name[80];
394 
395 	rcu_read_lock();
396 	memcg_data = READ_ONCE(page->memcg_data);
397 	if (!memcg_data)
398 		goto out_unlock;
399 
400 	if (memcg_data & MEMCG_DATA_OBJCGS)
401 		ret += scnprintf(kbuf + ret, count - ret,
402 				"Slab cache page\n");
403 
404 	memcg = page_memcg_check(page);
405 	if (!memcg)
406 		goto out_unlock;
407 
408 	online = (memcg->css.flags & CSS_ONLINE);
409 	cgroup_name(memcg->css.cgroup, name, sizeof(name));
410 	ret += scnprintf(kbuf + ret, count - ret,
411 			"Charged %sto %smemcg %s\n",
412 			PageMemcgKmem(page) ? "(via objcg) " : "",
413 			online ? "" : "offline ",
414 			name);
415 out_unlock:
416 	rcu_read_unlock();
417 #endif /* CONFIG_MEMCG */
418 
419 	return ret;
420 }
421 
422 static ssize_t
print_page_owner(char __user * buf,size_t count,unsigned long pfn,struct page * page,struct page_owner * page_owner,depot_stack_handle_t handle)423 print_page_owner(char __user *buf, size_t count, unsigned long pfn,
424 		struct page *page, struct page_owner *page_owner,
425 		depot_stack_handle_t handle)
426 {
427 	int ret, pageblock_mt, page_mt;
428 	char *kbuf;
429 
430 	count = min_t(size_t, count, PAGE_SIZE);
431 	kbuf = kmalloc(count, GFP_KERNEL);
432 	if (!kbuf)
433 		return -ENOMEM;
434 
435 	ret = scnprintf(kbuf, count,
436 			"Page allocated via order %u, mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu ns, free_ts %llu ns\n",
437 			page_owner->order, page_owner->gfp_mask,
438 			&page_owner->gfp_mask, page_owner->pid,
439 			page_owner->tgid, page_owner->comm,
440 			page_owner->ts_nsec, page_owner->free_ts_nsec);
441 
442 	/* Print information relevant to grouping pages by mobility */
443 	pageblock_mt = get_pageblock_migratetype(page);
444 	page_mt  = gfp_migratetype(page_owner->gfp_mask);
445 	ret += scnprintf(kbuf + ret, count - ret,
446 			"PFN 0x%lx type %s Block %lu type %s Flags %pGp\n",
447 			pfn,
448 			migratetype_names[page_mt],
449 			pfn >> pageblock_order,
450 			migratetype_names[pageblock_mt],
451 			&page->flags);
452 
453 	ret += stack_depot_snprint(handle, kbuf + ret, count - ret, 0);
454 	if (ret >= count)
455 		goto err;
456 
457 	if (page_owner->last_migrate_reason != -1) {
458 		ret += scnprintf(kbuf + ret, count - ret,
459 			"Page has been migrated, last migrate reason: %s\n",
460 			migrate_reason_names[page_owner->last_migrate_reason]);
461 	}
462 
463 	ret = print_page_owner_memcg(kbuf, count, ret, page);
464 
465 	ret += snprintf(kbuf + ret, count - ret, "\n");
466 	if (ret >= count)
467 		goto err;
468 
469 	if (copy_to_user(buf, kbuf, ret))
470 		ret = -EFAULT;
471 
472 	kfree(kbuf);
473 	return ret;
474 
475 err:
476 	kfree(kbuf);
477 	return -ENOMEM;
478 }
479 
__dump_page_owner(const struct page * page)480 void __dump_page_owner(const struct page *page)
481 {
482 	struct page_ext *page_ext = page_ext_get((void *)page);
483 	struct page_owner *page_owner;
484 	depot_stack_handle_t handle;
485 	gfp_t gfp_mask;
486 	int mt;
487 
488 	if (unlikely(!page_ext)) {
489 		pr_alert("There is not page extension available.\n");
490 		return;
491 	}
492 
493 	page_owner = get_page_owner(page_ext);
494 	gfp_mask = page_owner->gfp_mask;
495 	mt = gfp_migratetype(gfp_mask);
496 
497 	if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
498 		pr_alert("page_owner info is not present (never set?)\n");
499 		page_ext_put(page_ext);
500 		return;
501 	}
502 
503 	if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
504 		pr_alert("page_owner tracks the page as allocated\n");
505 	else
506 		pr_alert("page_owner tracks the page as freed\n");
507 
508 	pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg), pid %d, tgid %d (%s), ts %llu, free_ts %llu\n",
509 		 page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask,
510 		 page_owner->pid, page_owner->tgid, page_owner->comm,
511 		 page_owner->ts_nsec, page_owner->free_ts_nsec);
512 
513 	handle = READ_ONCE(page_owner->handle);
514 	if (!handle)
515 		pr_alert("page_owner allocation stack trace missing\n");
516 	else
517 		stack_depot_print(handle);
518 
519 	handle = READ_ONCE(page_owner->free_handle);
520 	if (!handle) {
521 		pr_alert("page_owner free stack trace missing\n");
522 	} else {
523 		pr_alert("page last free pid %d tgid %d stack trace:\n",
524 			  page_owner->free_pid, page_owner->free_tgid);
525 		stack_depot_print(handle);
526 	}
527 
528 	if (page_owner->last_migrate_reason != -1)
529 		pr_alert("page has been migrated, last migrate reason: %s\n",
530 			migrate_reason_names[page_owner->last_migrate_reason]);
531 	page_ext_put(page_ext);
532 }
533 
534 static ssize_t
read_page_owner(struct file * file,char __user * buf,size_t count,loff_t * ppos)535 read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
536 {
537 	unsigned long pfn;
538 	struct page *page;
539 	struct page_ext *page_ext;
540 	struct page_owner *page_owner;
541 	depot_stack_handle_t handle;
542 
543 	if (!static_branch_unlikely(&page_owner_inited))
544 		return -EINVAL;
545 
546 	page = NULL;
547 	if (*ppos == 0)
548 		pfn = min_low_pfn;
549 	else
550 		pfn = *ppos;
551 	/* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */
552 	while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0)
553 		pfn++;
554 
555 	/* Find an allocated page */
556 	for (; pfn < max_pfn; pfn++) {
557 		/*
558 		 * This temporary page_owner is required so
559 		 * that we can avoid the context switches while holding
560 		 * the rcu lock and copying the page owner information to
561 		 * user through copy_to_user() or GFP_KERNEL allocations.
562 		 */
563 		struct page_owner page_owner_tmp;
564 
565 		/*
566 		 * If the new page is in a new MAX_ORDER_NR_PAGES area,
567 		 * validate the area as existing, skip it if not
568 		 */
569 		if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) {
570 			pfn += MAX_ORDER_NR_PAGES - 1;
571 			continue;
572 		}
573 
574 		page = pfn_to_page(pfn);
575 		if (PageBuddy(page)) {
576 			unsigned long freepage_order = buddy_order_unsafe(page);
577 
578 			if (freepage_order <= MAX_ORDER)
579 				pfn += (1UL << freepage_order) - 1;
580 			continue;
581 		}
582 
583 		page_ext = page_ext_get(page);
584 		if (unlikely(!page_ext))
585 			continue;
586 
587 		/*
588 		 * Some pages could be missed by concurrent allocation or free,
589 		 * because we don't hold the zone lock.
590 		 */
591 		if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
592 			goto ext_put_continue;
593 
594 		/*
595 		 * Although we do have the info about past allocation of free
596 		 * pages, it's not relevant for current memory usage.
597 		 */
598 		if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
599 			goto ext_put_continue;
600 
601 		page_owner = get_page_owner(page_ext);
602 
603 		/*
604 		 * Don't print "tail" pages of high-order allocations as that
605 		 * would inflate the stats.
606 		 */
607 		if (!IS_ALIGNED(pfn, 1 << page_owner->order))
608 			goto ext_put_continue;
609 
610 		/*
611 		 * Access to page_ext->handle isn't synchronous so we should
612 		 * be careful to access it.
613 		 */
614 		handle = READ_ONCE(page_owner->handle);
615 		if (!handle)
616 			goto ext_put_continue;
617 
618 		/* Record the next PFN to read in the file offset */
619 		*ppos = pfn + 1;
620 
621 		page_owner_tmp = *page_owner;
622 		page_ext_put(page_ext);
623 		return print_page_owner(buf, count, pfn, page,
624 				&page_owner_tmp, handle);
625 ext_put_continue:
626 		page_ext_put(page_ext);
627 	}
628 
629 	return 0;
630 }
631 
lseek_page_owner(struct file * file,loff_t offset,int orig)632 static loff_t lseek_page_owner(struct file *file, loff_t offset, int orig)
633 {
634 	switch (orig) {
635 	case SEEK_SET:
636 		file->f_pos = offset;
637 		break;
638 	case SEEK_CUR:
639 		file->f_pos += offset;
640 		break;
641 	default:
642 		return -EINVAL;
643 	}
644 	return file->f_pos;
645 }
646 
init_pages_in_zone(pg_data_t * pgdat,struct zone * zone)647 static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
648 {
649 	unsigned long pfn = zone->zone_start_pfn;
650 	unsigned long end_pfn = zone_end_pfn(zone);
651 	unsigned long count = 0;
652 
653 	/*
654 	 * Walk the zone in pageblock_nr_pages steps. If a page block spans
655 	 * a zone boundary, it will be double counted between zones. This does
656 	 * not matter as the mixed block count will still be correct
657 	 */
658 	for (; pfn < end_pfn; ) {
659 		unsigned long block_end_pfn;
660 
661 		if (!pfn_valid(pfn)) {
662 			pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
663 			continue;
664 		}
665 
666 		block_end_pfn = pageblock_end_pfn(pfn);
667 		block_end_pfn = min(block_end_pfn, end_pfn);
668 
669 		for (; pfn < block_end_pfn; pfn++) {
670 			struct page *page = pfn_to_page(pfn);
671 			struct page_ext *page_ext;
672 
673 			if (page_zone(page) != zone)
674 				continue;
675 
676 			/*
677 			 * To avoid having to grab zone->lock, be a little
678 			 * careful when reading buddy page order. The only
679 			 * danger is that we skip too much and potentially miss
680 			 * some early allocated pages, which is better than
681 			 * heavy lock contention.
682 			 */
683 			if (PageBuddy(page)) {
684 				unsigned long order = buddy_order_unsafe(page);
685 
686 				if (order > 0 && order <= MAX_ORDER)
687 					pfn += (1UL << order) - 1;
688 				continue;
689 			}
690 
691 			if (PageReserved(page))
692 				continue;
693 
694 			page_ext = page_ext_get(page);
695 			if (unlikely(!page_ext))
696 				continue;
697 
698 			/* Maybe overlapping zone */
699 			if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
700 				goto ext_put_continue;
701 
702 			/* Found early allocated page */
703 			__set_page_owner_handle(page_ext, early_handle,
704 						0, 0);
705 			count++;
706 ext_put_continue:
707 			page_ext_put(page_ext);
708 		}
709 		cond_resched();
710 	}
711 
712 	pr_info("Node %d, zone %8s: page owner found early allocated %lu pages\n",
713 		pgdat->node_id, zone->name, count);
714 }
715 
init_zones_in_node(pg_data_t * pgdat)716 static void init_zones_in_node(pg_data_t *pgdat)
717 {
718 	struct zone *zone;
719 	struct zone *node_zones = pgdat->node_zones;
720 
721 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
722 		if (!populated_zone(zone))
723 			continue;
724 
725 		init_pages_in_zone(pgdat, zone);
726 	}
727 }
728 
init_early_allocated_pages(void)729 static void init_early_allocated_pages(void)
730 {
731 	pg_data_t *pgdat;
732 
733 	for_each_online_pgdat(pgdat)
734 		init_zones_in_node(pgdat);
735 }
736 
737 static const struct file_operations proc_page_owner_operations = {
738 	.read		= read_page_owner,
739 	.llseek		= lseek_page_owner,
740 };
741 
pageowner_init(void)742 static int __init pageowner_init(void)
743 {
744 	if (!static_branch_unlikely(&page_owner_inited)) {
745 		pr_info("page_owner is disabled\n");
746 		return 0;
747 	}
748 
749 	debugfs_create_file("page_owner", 0400, NULL, NULL,
750 			    &proc_page_owner_operations);
751 
752 	return 0;
753 }
754 late_initcall(pageowner_init)
755