• Home
  • Raw
  • Download

Lines Matching +full:scrubber +full:- +full:done

1 // SPDX-License-Identifier: GPL-2.0-only
7 * hardware as being corrupted usually due to a multi-bit ECC memory or cache
11 * not-yet-corrupted-by-suspicious pages without killing anything.
23 * - You know how to test it.
24 * - You have a test that can be added to mce-test
25 * https://git.kernel.org/cgit/utils/cpu/mce/mce-test.git/
26 * - The case actually shows up as a frequent (top 10) page state in
27 * tools/mm/page-types when running a real workload.
41 #include <linux/page-flags.h>
50 #include <linux/backing-dev.h>
86 if (pfn != -1UL) in num_poisoned_pages_sub()
91 * MF_ATTR_RO - Create sysfs entry for each memory failure statistics.
100 &NODE_DATA(dev->id)->mf_stats; \
101 return sprintf(buf, "%lu\n", mf_stats->_name); \
183 * returns 0 for non-hugetlb pages as well. in page_handle_poison()
189 * acceptable because soft-offlined page is not broken in page_handle_poison()
228 if (mapping == NULL || mapping->host == NULL) in hwpoison_filter_dev()
229 return -EINVAL; in hwpoison_filter_dev()
231 dev = mapping->host->i_sb->s_dev; in hwpoison_filter_dev()
234 return -EINVAL; in hwpoison_filter_dev()
237 return -EINVAL; in hwpoison_filter_dev()
251 return -EINVAL; in hwpoison_filter_flags()
273 return -EINVAL; in hwpoison_filter_task()
287 return -EINVAL; in hwpoison_filter()
290 return -EINVAL; in hwpoison_filter()
293 return -EINVAL; in hwpoison_filter()
317 * from the VMAs. So do a brute-force search over all
342 struct task_struct *t = tk->tsk; in kill_proc()
343 short addr_lsb = tk->size_shift; in kill_proc()
347 pfn, t->comm, t->pid); in kill_proc()
351 (void __user *)tk->addr, addr_lsb); in kill_proc()
361 ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr, in kill_proc()
365 t->comm, t->pid, ret); in kill_proc()
378 * TODO: Could shrink slab caches here if a lightweight range-based in shake_page()
399 VM_BUG_ON_VMA(address == -EFAULT, vma); in dev_pagemap_mapping_shift()
400 pgd = pgd_offset(vma->vm_mm, address); in dev_pagemap_mapping_shift()
439 * memory_failure event. In all other cases, page->index and
440 * page->mapping are sufficient for mapping the page back to its
455 tk->addr = ksm_addr ? ksm_addr : page_address_in_vma(p, vma); in __add_to_kill()
458 tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma); in __add_to_kill()
459 tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr); in __add_to_kill()
461 tk->size_shift = page_shift(compound_head(p)); in __add_to_kill()
464 * Send SIGKILL if "tk->addr == -EFAULT". Also, as in __add_to_kill()
465 * "tk->size_shift" is always non-zero for !is_zone_device_page(), in __add_to_kill()
466 * so "tk->size_shift == 0" effectively checks no mapping on in __add_to_kill()
473 if (tk->addr == -EFAULT) { in __add_to_kill()
475 page_to_pfn(p), tsk->comm); in __add_to_kill()
476 } else if (tk->size_shift == 0) { in __add_to_kill()
482 tk->tsk = tsk; in __add_to_kill()
483 list_add_tail(&tk->nd, to_kill); in __add_to_kill()
500 if (tk->tsk == tsk) in task_in_to_kill_list()
534 if (fail || tk->addr == -EFAULT) { in kill_procs()
536 pfn, tk->tsk->comm, tk->tsk->pid); in kill_procs()
538 tk->tsk, PIDTYPE_PID); in kill_procs()
543 * something else on the address in-between. We could in kill_procs()
549 pfn, tk->tsk->comm, tk->tsk->pid); in kill_procs()
551 list_del(&tk->nd); in kill_procs()
552 put_task_struct(tk->tsk); in kill_procs()
570 if (t->flags & PF_MCE_PROCESS) { in find_early_kill_thread()
571 if (t->flags & PF_MCE_EARLY) in find_early_kill_thread()
595 if (!tsk->mm) in task_early_kill()
598 * Comparing ->mm here because current task might represent in task_early_kill()
601 if (force_early && tsk->mm == current->mm) in task_early_kill()
630 anon_vma_interval_tree_foreach(vmac, &av->rb_root, in collect_procs_anon()
632 vma = vmac->vma; in collect_procs_anon()
633 if (vma->vm_mm != t->mm) in collect_procs_anon()
652 struct address_space *mapping = folio->mapping; in collect_procs_file()
663 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, in collect_procs_file()
672 if (vma->vm_mm == t->mm) in collect_procs_file()
705 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { in collect_procs_fsdax()
706 if (vma->vm_mm == t->mm) in collect_procs_fsdax()
721 if (!folio->mapping) in collect_procs()
739 tk->addr = addr; in set_to_kill()
740 tk->size_shift = shift; in set_to_kill()
775 if (pfn <= hwp->pfn && hwp->pfn < pfn + HPAGE_PMD_NR) { in check_hwpoisoned_pmd_entry()
776 hwpoison_vaddr = addr + ((hwp->pfn - pfn) << PAGE_SHIFT); in check_hwpoisoned_pmd_entry()
777 set_to_kill(&hwp->tk, hwpoison_vaddr, PAGE_SHIFT); in check_hwpoisoned_pmd_entry()
793 struct hwpoison_walk *hwp = walk->private; in hwpoison_pte_range()
798 ptl = pmd_trans_huge_lock(pmdp, walk->vma); in hwpoison_pte_range()
805 mapped_pte = ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, in hwpoison_pte_range()
812 hwp->pfn, &hwp->tk); in hwpoison_pte_range()
827 struct hwpoison_walk *hwp = walk->private; in hwpoison_hugetlb_range()
829 struct hstate *h = hstate_vma(walk->vma); in hwpoison_hugetlb_range()
832 hwp->pfn, &hwp->tk); in hwpoison_hugetlb_range()
866 if (!p->mm) in kill_accessing_process()
867 return -EFAULT; in kill_accessing_process()
869 mmap_read_lock(p->mm); in kill_accessing_process()
870 ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwpoison_walk_ops, in kill_accessing_process()
880 mmap_read_unlock(p->mm); in kill_accessing_process()
882 return ret > 0 ? -EHWPOISON : 0; in kill_accessing_process()
894 [MF_MSG_KERNEL_HIGH_ORDER] = "high-order kernel page",
926 * complain when the page is unpoison-and-freed. in delete_from_lru_cache()
943 return -EIO; in delete_from_lru_cache()
951 if (mapping->a_ops->error_remove_page) { in truncate_error_page()
953 int err = mapping->a_ops->error_remove_page(mapping, p); in truncate_error_page()
980 /* Callback ->action() has to unlock the relevant page inside it. */
993 int count = page_count(p) - 1; in has_extra_refcount()
996 count -= 1; in has_extra_refcount()
1000 page_to_pfn(p), action_page_types[ps->type], count); in has_extra_refcount()
1040 * For anonymous pages we're done the only reference left in me_pagecache_clean()
1066 * so is expected to have an extra refcount after error-handling. in me_pagecache_clean()
1131 mapping_set_error(mapping, -EIO); in me_pagecache_dirty()
1146 * - clear dirty bit to prevent IO
1147 * - remove from LRU
1148 * - but keep in the swap cache, so that when we return to it on
1196 * - Error on hugepage is contained in hugepage unit (not in raw page unit.)
1208 res = truncate_error_page(&folio->page, page_to_pfn(p), mapping); in me_huge_page()
1237 * A page state is defined by its current page->flags bits.
1311 mf_stats = &NODE_DATA(nid)->mf_stats; in update_per_node_mf_stats()
1314 ++mf_stats->ignored; in update_per_node_mf_stats()
1317 ++mf_stats->failed; in update_per_node_mf_stats()
1320 ++mf_stats->delayed; in update_per_node_mf_stats()
1323 ++mf_stats->recovered; in update_per_node_mf_stats()
1329 ++mf_stats->total; in update_per_node_mf_stats()
1348 return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY; in action_result()
1356 /* page p should be unlocked after returning from ps->action(). */ in page_action()
1357 result = ps->action(ps, p); in page_action()
1364 return action_result(pfn, ps->type, result); in page_action()
1391 /* Soft offline could migrate non-LRU movable pages */ in HWPoisonHandlable()
1420 if (!HWPoisonHandlable(&folio->page, flags)) in __get_hwpoison_page()
1421 return -EBUSY; in __get_hwpoison_page()
1450 ret = -EBUSY; in get_any_page()
1455 ret = -EIO; in get_any_page()
1458 } else if (ret == -EBUSY) { in get_any_page()
1467 ret = -EIO; in get_any_page()
1486 ret = -EIO; in get_any_page()
1489 if (ret == -EIO) in get_any_page()
1516 return -EHWPOISON; in __get_unpoison_page()
1522 * get_hwpoison_page() - Get refcount for memory error handling
1527 * error on it, after checking that the error page is in a well-defined state
1528 * (defined as a page-type we can successfully handle the memory error on it,
1534 * extra care for the error page's state (as done in __get_hwpoison_page()),
1542 * 1 on success for in-use pages in a well-defined state,
1543 * -EIO for pages on which we can not handle memory errors,
1544 * -EBUSY when get_hwpoison_page() has raced with page lifecycle
1546 * -EHWPOISON when the page is hwpoisoned and taken off from buddy.
1578 * Here we are interested only in user-mapped pages, so skip any in hwpoison_user_mappings()
1618 * mapped in dirty form. This has to be done before try_to_unmap, in hwpoison_user_mappings()
1655 * struct page and all unmaps done we can decide if in hwpoison_user_mappings()
1660 * use a more force-full uncatchable kill to prevent in hwpoison_user_mappings()
1681 if ((p->flags & ps->mask) == ps->res) in identify_page_state()
1684 page_flags |= (p->flags & (1UL << PG_dirty)); in identify_page_state()
1686 if (!ps->mask) in identify_page_state()
1688 if ((page_flags & ps->mask) == ps->res) in identify_page_state()
1714 if (tk->size_shift) in unmap_and_kill()
1715 size = max(size, 1UL << tk->size_shift); in unmap_and_kill()
1719 * Unmap the largest mapping to avoid breaking up device-dax in unmap_and_kill()
1724 loff_t start = ((loff_t)index << PAGE_SHIFT) & ~(size - 1); in unmap_and_kill()
1758 return -EBUSY; in mf_generic_kill_procs()
1760 if (hwpoison_filter(&folio->page)) { in mf_generic_kill_procs()
1761 rc = -EOPNOTSUPP; in mf_generic_kill_procs()
1765 switch (pgmap->type) { in mf_generic_kill_procs()
1770 * with device-side memory. in mf_generic_kill_procs()
1772 rc = -ENXIO; in mf_generic_kill_procs()
1782 SetPageHWPoison(&folio->page); in mf_generic_kill_procs()
1785 * Unlike System-RAM there is no possibility to swap in a in mf_generic_kill_procs()
1791 collect_procs(folio, &folio->page, &to_kill, true); in mf_generic_kill_procs()
1793 unmap_and_kill(&to_kill, pfn, folio->mapping, folio->index, flags); in mf_generic_kill_procs()
1801 * mf_dax_kill_procs - Collect and kill processes who are using this file range
1821 return -EBUSY; in mf_dax_kill_procs()
1842 * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
1851 return (struct llist_head *)&folio->_hugetlb_hwpoison; in raw_hwp_list_head()
1877 llist_for_each_entry(p, raw_hwp_head->first, node) { in is_raw_hwpoison_page_in_hugepage()
1878 if (page == p->page) { in is_raw_hwpoison_page_in_hugepage()
1898 SetPageHWPoison(p->page); in __folio_free_raw_hwp()
1900 num_poisoned_pages_sub(page_to_pfn(p->page), 1); in __folio_free_raw_hwp()
1912 int ret = folio_test_set_hwpoison(folio) ? -EHWPOISON : 0; in folio_set_hugetlb_hwpoison()
1920 return -EHWPOISON; in folio_set_hugetlb_hwpoison()
1922 llist_for_each_entry_safe(p, next, head->first, node) { in folio_set_hugetlb_hwpoison()
1923 if (p->page == page) in folio_set_hugetlb_hwpoison()
1924 return -EHWPOISON; in folio_set_hugetlb_hwpoison()
1929 raw_hwp->page = page; in folio_set_hugetlb_hwpoison()
1930 llist_add(&raw_hwp->node, head); in folio_set_hugetlb_hwpoison()
1983 * 0 - free hugepage
1984 * 1 - in-use hugepage
1985 * 2 - not a hugepage
1986 * -EBUSY - the hugepage is busy (try to retry)
1987 * -EHWPOISON - the hugepage is already hwpoisoned
2010 ret = -EBUSY; in __get_huge_page_for_hwpoison()
2016 ret = -EHWPOISON; in __get_huge_page_for_hwpoison()
2040 * PageHWPoison) should be done in single hugetlb_lock range.
2056 } else if (res == -EHWPOISON) { in try_memory_failure_hugetlb()
2063 } else if (res == -EBUSY) { in try_memory_failure_hugetlb()
2081 return -EOPNOTSUPP; in try_memory_failure_hugetlb()
2099 page_flags = folio->flags; in try_memory_failure_hugetlb()
2101 if (!hwpoison_user_mappings(p, pfn, flags, &folio->page)) { in try_memory_failure_hugetlb()
2137 int rc = -ENXIO; in memory_failure_dev_pagemap()
2148 rc = pgmap->ops->memory_failure(pgmap, pfn, 1, flags); in memory_failure_dev_pagemap()
2153 if (rc != -EOPNOTSUPP) in memory_failure_dev_pagemap()
2161 if (rc != -EOPNOTSUPP) in memory_failure_dev_pagemap()
2167 * memory_failure - Handle memory failure of a page.
2178 * detected by a background scrubber)
2184 * -EOPNOTSUPP for hwpoison_filter() filtered the error event,
2185 * < 0(except -EOPNOTSUPP) on failure.
2221 res = -ENXIO; in memory_failure()
2232 res = -EHWPOISON; in memory_failure()
2244 * 2) it's part of a non-compound high order page. in memory_failure()
2302 * We ignore non-LRU pages for good reasons. in memory_failure()
2303 * - PG_locked is only well defined for LRU pages and a few others in memory_failure()
2304 * - to avoid races with __SetPageLocked() in memory_failure()
2305 * - to avoid races with __SetPageSlab*() (and more non-atomic ops) in memory_failure()
2314 * We're only intended to deal with the non-Compound page here. in memory_failure()
2339 page_flags = p->flags; in memory_failure()
2345 res = -EOPNOTSUPP; in memory_failure()
2375 if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { in memory_failure()
2410 * memory_failure_queue - Schedule handling memory failure of a page.
2421 * detected by a background scrubber)
2436 raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); in memory_failure_queue()
2437 buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry); in memory_failure_queue()
2439 schedule_work_on(smp_processor_id(), &mf_cpu->work); in memory_failure_queue()
2440 raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); in memory_failure_queue()
2457 raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); in memory_failure_work_func()
2458 gotten = kfifo_get(&mf_cpu->fifo, &entry); in memory_failure_work_func()
2459 raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); in memory_failure_work_func()
2471 * Used to avoid return-to-userspace racing with the memory_failure workqueue.
2478 cancel_work_sync(&mf_cpu->work); in memory_failure_queue_kick()
2479 memory_failure_work_func(&mf_cpu->work); in memory_failure_queue_kick()
2489 raw_spin_lock_init(&mf_cpu->lock); in memory_failure_init()
2490 INIT_KFIFO(mf_cpu->fifo); in memory_failure_init()
2491 INIT_WORK(&mf_cpu->work, memory_failure_work_func); in memory_failure_init()
2509 * unpoison_memory - Unpoison a previously poisoned page
2512 * Software-unpoison a page that has been poisoned by
2515 * This is only done on the software-level, so it only works
2518 * Returns 0 for success, otherwise -errno.
2524 int ret = -EBUSY, ghp; in unpoison_memory()
2531 return -ENXIO; in unpoison_memory()
2541 ret = -EOPNOTSUPP; in unpoison_memory()
2545 if (is_huge_zero_page(&folio->page)) { in unpoison_memory()
2548 ret = -EOPNOTSUPP; in unpoison_memory()
2564 if (folio_test_slab(folio) || PageTable(&folio->page) || in unpoison_memory()
2565 folio_test_reserved(folio) || PageOffline(&folio->page)) in unpoison_memory()
2569 * Note that folio->_mapcount is overloaded in SLAB, so the simple test in unpoison_memory()
2570 * in folio_mapped() has to be done after folio_test_slab() is checked. in unpoison_memory()
2579 unpoison_pr_info("Unpoison: the hwpoison page has non-NULL mapping %#lx\n", in unpoison_memory()
2592 ret = folio_test_clear_hwpoison(folio) ? 0 : -EBUSY; in unpoison_memory()
2594 if (ghp == -EHWPOISON) { in unpoison_memory()
2595 ret = put_page_back_buddy(p) ? 0 : -EBUSY; in unpoison_memory()
2623 unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", in unpoison_memory()
2646 list_add(&page->lru, pagelist); in isolate_page()
2665 * soft_offline_in_use_page handles hugetlb-pages and non-hugetlb pages.
2666 * If the page is a non-dirty unmapped page-cache page, it simply invalidates.
2685 return -EBUSY; in soft_offline_in_use_page()
2721 ret = -EBUSY; in soft_offline_in_use_page()
2727 pfn, msg_page[huge], ret, &page->flags); in soft_offline_in_use_page()
2729 ret = -EBUSY; in soft_offline_in_use_page()
2733 pfn, msg_page[huge], page_count(page), &page->flags); in soft_offline_in_use_page()
2734 ret = -EBUSY; in soft_offline_in_use_page()
2740 * soft_offline_page - Soft offline a page.
2741 * @pfn: pfn to soft-offline
2745 * -EOPNOTSUPP for hwpoison_filter() filtered the error event
2771 return -ENXIO; in soft_offline_page()
2774 /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ in soft_offline_page()
2778 return -EIO; in soft_offline_page()
2800 return -EOPNOTSUPP; in soft_offline_page()
2812 ret = -EBUSY; in soft_offline_page()