Lines Matching +full:scrubber +full:- +full:done
1 // SPDX-License-Identifier: GPL-2.0-only
7 * hardware as being corrupted usually due to a multi-bit ECC memory or cache
11 * not-yet-corrupted-by-suspicious pages without killing anything.
23 * - You know how to test it.
24 * - You have a test that can be added to mce-test
25 * https://git.kernel.org/cgit/utils/cpu/mce/mce-test.git/
26 * - The case actually shows up as a frequent (top 10) page state in
27 * tools/vm/page-types when running a real workload.
38 #include <linux/page-flags.h>
39 #include <linux/kernel-page-flags.h>
47 #include <linux/backing-dev.h>
58 #include <linux/page-isolation.h>
73 * returns 0 for non-hugetlb pages as well. in page_handle_poison()
79 * acceptable because soft-offlined page is not broken in page_handle_poison()
121 return -EINVAL; in hwpoison_filter_dev()
124 if (mapping == NULL || mapping->host == NULL) in hwpoison_filter_dev()
125 return -EINVAL; in hwpoison_filter_dev()
127 dev = mapping->host->i_sb->s_dev; in hwpoison_filter_dev()
130 return -EINVAL; in hwpoison_filter_dev()
133 return -EINVAL; in hwpoison_filter_dev()
147 return -EINVAL; in hwpoison_filter_flags()
169 return -EINVAL; in hwpoison_filter_task()
183 return -EINVAL; in hwpoison_filter()
186 return -EINVAL; in hwpoison_filter()
189 return -EINVAL; in hwpoison_filter()
213 * from the VMAs. So do a brute-force search over all
238 struct task_struct *t = tk->tsk; in kill_proc()
239 short addr_lsb = tk->size_shift; in kill_proc()
243 pfn, t->comm, t->pid); in kill_proc()
248 (void __user *)tk->addr, addr_lsb); in kill_proc()
256 ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr, in kill_proc()
261 t->comm, t->pid, ret); in kill_proc()
302 pgd = pgd_offset(vma->vm_mm, address); in dev_pagemap_mapping_shift()
347 tk->addr = page_address_in_vma(p, vma); in add_to_kill()
349 tk->size_shift = dev_pagemap_mapping_shift(p, vma); in add_to_kill()
351 tk->size_shift = page_shift(compound_head(p)); in add_to_kill()
354 * Send SIGKILL if "tk->addr == -EFAULT". Also, as in add_to_kill()
355 * "tk->size_shift" is always non-zero for !is_zone_device_page(), in add_to_kill()
356 * so "tk->size_shift == 0" effectively checks no mapping on in add_to_kill()
363 if (tk->addr == -EFAULT) { in add_to_kill()
365 page_to_pfn(p), tsk->comm); in add_to_kill()
366 } else if (tk->size_shift == 0) { in add_to_kill()
372 tk->tsk = tsk; in add_to_kill()
373 list_add_tail(&tk->nd, to_kill); in add_to_kill()
396 if (fail || tk->addr == -EFAULT) { in kill_procs()
398 pfn, tk->tsk->comm, tk->tsk->pid); in kill_procs()
400 tk->tsk, PIDTYPE_PID); in kill_procs()
405 * something else on the address in-between. We could in kill_procs()
411 pfn, tk->tsk->comm, tk->tsk->pid); in kill_procs()
413 put_task_struct(tk->tsk); in kill_procs()
431 if (t->flags & PF_MCE_PROCESS) { in find_early_kill_thread()
432 if (t->flags & PF_MCE_EARLY) in find_early_kill_thread()
454 if (!tsk->mm) in task_early_kill()
458 * Comparing ->mm here because current task might represent in task_early_kill()
461 if (tsk->mm == current->mm) in task_early_kill()
492 anon_vma_interval_tree_foreach(vmac, &av->rb_root, in collect_procs_anon()
494 vma = vmac->vma; in collect_procs_anon()
497 if (vma->vm_mm == t->mm) in collect_procs_anon()
513 struct address_space *mapping = page->mapping; in collect_procs_file()
524 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, in collect_procs_file()
533 if (vma->vm_mm == t->mm) in collect_procs_file()
547 if (!page->mapping) in collect_procs()
565 [MF_MSG_KERNEL_HIGH_ORDER] = "high-order kernel page",
571 [MF_MSG_NON_PMD_HUGE] = "non-pmd-sized huge page",
600 * complain when the page is unpoison-and-freed. in delete_from_lru_cache()
617 return -EIO; in delete_from_lru_cache()
625 if (mapping->a_ops->error_remove_page) { in truncate_error_page()
626 int err = mapping->a_ops->error_remove_page(mapping, p); in truncate_error_page()
682 * For anonymous pages we're done the only reference left in me_pagecache_clean()
757 mapping_set_error(mapping, -EIO); in me_pagecache_dirty()
772 * - clear dirty bit to prevent IO
773 * - remove from LRU
774 * - but keep in the swap cache, so that when we return to it on
807 * - Error on hugepage is contained in hugepage unit (not in raw page unit.)
842 * A page state is defined by its current page->flags bits.
928 result = ps->action(p, pfn); in page_action()
930 count = page_count(p) - 1; in page_action()
931 if (ps->action == me_swapcache_dirty && result == MF_DELAYED) in page_action()
932 count--; in page_action()
935 pfn, action_page_types[ps->type], count); in page_action()
938 action_result(pfn, ps->type, result); in page_action()
945 return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY; in page_action()
949 * get_hwpoison_page() - Get refcount for memory error handling:
953 * non-zero value.)
1001 * Here we are interested only in user-mapped pages, so skip any in hwpoison_user_mappings()
1048 * mapped in dirty form. This has to be done before try_to_unmap, in hwpoison_user_mappings()
1052 * there's nothing that can be done. in hwpoison_user_mappings()
1094 * struct page and all unmaps done we can decide if in hwpoison_user_mappings()
1099 * use a more force-full uncatchable kill to prevent in hwpoison_user_mappings()
1119 if ((p->flags & ps->mask) == ps->res) in identify_page_state()
1122 page_flags |= (p->flags & (1UL << PG_dirty)); in identify_page_state()
1124 if (!ps->mask) in identify_page_state()
1126 if ((page_flags & ps->mask) == ps->res) in identify_page_state()
1143 return -EBUSY; in try_to_split_thp_page()
1160 return -EHWPOISON; in memory_failure_hugetlb()
1185 page_flags = head->flags; in memory_failure_hugetlb()
1196 * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so in memory_failure_hugetlb()
1199 * - conversion of a pud that maps an error hugetlb into hwpoison in memory_failure_hugetlb()
1201 * - other mm code walking over page table is aware of pud-aligned in memory_failure_hugetlb()
1206 res = -EBUSY; in memory_failure_hugetlb()
1212 res = -EBUSY; in memory_failure_hugetlb()
1230 int rc = -EBUSY; in memory_failure_dev_pagemap()
1242 rc = -ENXIO; in memory_failure_dev_pagemap()
1262 if (pgmap->type == MEMORY_DEVICE_PRIVATE) { in memory_failure_dev_pagemap()
1265 * with device-side memory. in memory_failure_dev_pagemap()
1277 * Unlike System-RAM there is no possibility to swap in a in memory_failure_dev_pagemap()
1286 if (tk->size_shift) in memory_failure_dev_pagemap()
1287 size = max(size, 1UL << tk->size_shift); in memory_failure_dev_pagemap()
1291 * device-dax mappings which are constant size. The in memory_failure_dev_pagemap()
1295 start = (page->index << PAGE_SHIFT) & ~(size - 1); in memory_failure_dev_pagemap()
1296 unmap_mapping_range(page->mapping, start, size, 0); in memory_failure_dev_pagemap()
1310 * memory_failure - Handle memory failure of a page.
1321 * detected by a background scrubber)
1349 return -ENXIO; in memory_failure()
1362 res = -EHWPOISON; in memory_failure()
1373 * 2) it's part of a non-compound high order page. in memory_failure()
1385 res = -EBUSY; in memory_failure()
1393 res = -EBUSY; in memory_failure()
1400 * We ignore non-LRU pages for good reasons. in memory_failure()
1401 * - PG_locked is only well defined for LRU pages and a few others in memory_failure()
1402 * - to avoid races with __SetPageLocked() in memory_failure()
1403 * - to avoid races with __SetPageSlab*() (and more non-atomic ops) in memory_failure()
1425 res = -EBUSY; in memory_failure()
1436 page_flags = p->flags; in memory_failure()
1476 res = -EBUSY; in memory_failure()
1483 if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { in memory_failure()
1485 res = -EBUSY; in memory_failure()
1517 * memory_failure_queue - Schedule handling memory failure of a page.
1528 * detected by a background scrubber)
1542 spin_lock_irqsave(&mf_cpu->lock, proc_flags); in memory_failure_queue()
1543 if (kfifo_put(&mf_cpu->fifo, entry)) in memory_failure_queue()
1544 schedule_work_on(smp_processor_id(), &mf_cpu->work); in memory_failure_queue()
1548 spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); in memory_failure_queue()
1562 spin_lock_irqsave(&mf_cpu->lock, proc_flags); in memory_failure_work_func()
1563 gotten = kfifo_get(&mf_cpu->fifo, &entry); in memory_failure_work_func()
1564 spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); in memory_failure_work_func()
1576 * Used to avoid return-to-userspace racing with the memory_failure workqueue.
1583 cancel_work_sync(&mf_cpu->work); in memory_failure_queue_kick()
1584 memory_failure_work_func(&mf_cpu->work); in memory_failure_queue_kick()
1594 spin_lock_init(&mf_cpu->lock); in memory_failure_init()
1595 INIT_KFIFO(mf_cpu->fifo); in memory_failure_init()
1596 INIT_WORK(&mf_cpu->work, memory_failure_work_func); in memory_failure_init()
1610 * unpoison_memory - Unpoison a previously poisoned page
1613 * Software-unpoison a page that has been poisoned by
1616 * This is only done on the software-level, so it only works
1619 * Returns 0 for success, otherwise -errno.
1630 return -ENXIO; in unpoison_memory()
1654 unpoison_pr_info("Unpoison: the hwpoison page has non-NULL mapping %#lx\n", in unpoison_memory()
1673 unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n", in unpoison_memory()
1686 unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", in unpoison_memory()
1703 * Returns 0 for a free page, 1 for an in-use page, -EIO for a page-type we
1704 * cannot handle and -EBUSY if we raced with an allocation.
1705 * We only incremented refcount in case the page was already in-use and it is
1722 ret = -EBUSY; in get_any_page()
1727 ret = -EIO; in get_any_page()
1744 ret = -EIO; in get_any_page()
1765 list_add(&page->lru, pagelist); in isolate_page()
1784 * __soft_offline_page handles hugetlb-pages and non-hugetlb pages.
1785 * If the page is a non-dirty unmapped page-cache page, it simply invalidates.
1804 * memory_failure() also double-checks PageHWPoison inside page lock, in __soft_offline_page()
1826 * RED-PEN would be better to keep it isolated here, but we in __soft_offline_page()
1842 ret = -EBUSY; in __soft_offline_page()
1848 pfn, msg_page[huge], ret, page->flags, &page->flags); in __soft_offline_page()
1850 ret = -EBUSY; in __soft_offline_page()
1854 pfn, msg_page[huge], page_count(page), page->flags, &page->flags); in __soft_offline_page()
1855 ret = -EBUSY; in __soft_offline_page()
1866 return -EBUSY; in soft_offline_in_use_page()
1877 * soft_offline_page - Soft offline a page.
1878 * @pfn: pfn to soft-offline
1907 return -ENXIO; in soft_offline_page()
1911 /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ in soft_offline_page()
1915 return -EIO; in soft_offline_page()
1938 ret = -EBUSY; in soft_offline_page()
1940 } else if (ret == -EIO) { in soft_offline_page()
1942 __func__, pfn, page->flags, &page->flags); in soft_offline_page()