• Home
  • Raw
  • Download

Lines Matching +full:processor +full:- +full:intensive

1 // SPDX-License-Identifier: GPL-2.0
34 #include <linux/backing-dev.h>
77 if ((_page)->lru.prev != _base) { \
80 prev = lru_to_page(&(_page->lru)); \
81 prefetchw(&prev->_field); \
101 WARN_ON_ONCE(rs && task->reclaim_state); in set_task_reclaim_state()
103 /* Check for the nulling of an already-nulled member */ in set_task_reclaim_state()
104 WARN_ON_ONCE(!rs && !task->reclaim_state); in set_task_reclaim_state()
106 task->reclaim_state = rs; in set_task_reclaim_state()
119 int id, ret = -ENOMEM; in prealloc_memcg_shrinker()
135 shrinker->id = id; in prealloc_memcg_shrinker()
144 int id = shrinker->id; in unregister_memcg_shrinker()
155 return sc->target_mem_cgroup; in cgroup_reclaim()
159 * writeback_throttling_sane - is the usual dirty throttling mechanism available?
221 * lruvec_lru_size - Returns the number of pages on the given LRU list.
234 struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid]; in lruvec_lru_size()
246 struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid]; in lruvec_lru_size()
264 unsigned int size = sizeof(*shrinker->nr_deferred); in prealloc_shrinker()
266 if (shrinker->flags & SHRINKER_NUMA_AWARE) in prealloc_shrinker()
269 shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); in prealloc_shrinker()
270 if (!shrinker->nr_deferred) in prealloc_shrinker()
271 return -ENOMEM; in prealloc_shrinker()
273 if (shrinker->flags & SHRINKER_MEMCG_AWARE) { in prealloc_shrinker()
281 kfree(shrinker->nr_deferred); in prealloc_shrinker()
282 shrinker->nr_deferred = NULL; in prealloc_shrinker()
283 return -ENOMEM; in prealloc_shrinker()
288 if (!shrinker->nr_deferred) in free_prealloced_shrinker()
291 if (shrinker->flags & SHRINKER_MEMCG_AWARE) { in free_prealloced_shrinker()
297 kfree(shrinker->nr_deferred); in free_prealloced_shrinker()
298 shrinker->nr_deferred = NULL; in free_prealloced_shrinker()
304 list_add_tail(&shrinker->list, &shrinker_list); in register_shrinker_prepared()
305 shrinker->flags |= SHRINKER_REGISTERED; in register_shrinker_prepared()
325 if (!(shrinker->flags & SHRINKER_REGISTERED)) in unregister_shrinker()
329 list_del(&shrinker->list); in unregister_shrinker()
330 shrinker->flags &= ~SHRINKER_REGISTERED; in unregister_shrinker()
331 if (shrinker->flags & SHRINKER_MEMCG_AWARE) in unregister_shrinker()
335 kfree(shrinker->nr_deferred); in unregister_shrinker()
336 shrinker->nr_deferred = NULL; in unregister_shrinker()
351 int nid = shrinkctl->nid; in do_shrink_slab()
352 long batch_size = shrinker->batch ? shrinker->batch in do_shrink_slab()
356 if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) in do_shrink_slab()
359 freeable = shrinker->count_objects(shrinker, shrinkctl); in do_shrink_slab()
368 nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0); in do_shrink_slab()
371 if (shrinker->seeks) { in do_shrink_slab()
374 do_div(delta, shrinker->seeks); in do_shrink_slab()
387 shrinker->scan_objects, total_scan); in do_shrink_slab()
396 * shrinkers to return -1 all the time. This results in a large in do_shrink_slab()
439 shrinkctl->nr_to_scan = nr_to_scan; in do_shrink_slab()
440 shrinkctl->nr_scanned = nr_to_scan; in do_shrink_slab()
441 ret = shrinker->scan_objects(shrinker, shrinkctl); in do_shrink_slab()
446 count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned); in do_shrink_slab()
447 total_scan -= shrinkctl->nr_scanned; in do_shrink_slab()
448 scanned += shrinkctl->nr_scanned; in do_shrink_slab()
454 next_deferred -= scanned; in do_shrink_slab()
464 &shrinker->nr_deferred[nid]); in do_shrink_slab()
466 new_nr = atomic_long_read(&shrinker->nr_deferred[nid]); in do_shrink_slab()
486 map = rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_map, in shrink_slab_memcg()
491 for_each_set_bit(i, map->map, shrinker_nr_max) { in shrink_slab_memcg()
500 if (unlikely(!shrinker || !(shrinker->flags & SHRINKER_REGISTERED))) { in shrink_slab_memcg()
502 clear_bit(i, map->map); in shrink_slab_memcg()
506 /* Call non-slab shrinkers even though kmem is disabled */ in shrink_slab_memcg()
508 !(shrinker->flags & SHRINKER_NONSLAB)) in shrink_slab_memcg()
513 clear_bit(i, map->map); in shrink_slab_memcg()
556 * shrink_slab - shrink slab caches
570 * @priority is sc->priority, we take the number of objects and >> by priority
661 * heads at page->private. in is_page_cache_freeable()
664 return page_count(page) - page_has_private(page) == 1 + page_cache_pins; in is_page_cache_freeable()
669 if (current->flags & PF_SWAPWRITE) in may_write_to_inode()
673 if (inode_to_bdi(inode) == current->backing_dev_info) in may_write_to_inode()
680 * -ENOSPC. We need to propagate that into the address_space for a subsequent
713 * Calls ->writepage().
719 * will be non-blocking. To prevent this allocation from being in pageout()
738 * page->mapping == NULL while being dirty with clean buffers. in pageout()
749 if (mapping->a_ops->writepage == NULL) in pageout()
751 if (!may_write_to_inode(mapping->host)) in pageout()
765 res = mapping->a_ops->writepage(page, &wbc); in pageout()
799 xa_lock_irqsave(&mapping->i_pages, flags); in __remove_mapping()
819 * escape unnoticed. The smp_rmb is needed to ensure the page->flags in __remove_mapping()
820 * load is not satisfied before that of page->_refcount. in __remove_mapping()
840 xa_unlock_irqrestore(&mapping->i_pages, flags); in __remove_mapping()
845 freepage = mapping->a_ops->freepage; in __remove_mapping()
866 xa_unlock_irqrestore(&mapping->i_pages, flags); in __remove_mapping()
875 xa_unlock_irqrestore(&mapping->i_pages, flags); in __remove_mapping()
880 * Attempt to detach a locked page from its ->mapping. If it is dirty or if
900 * putback_lru_page - put previously isolated page onto appropriate LRU list
928 referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup, in page_check_references()
964 * Activate file-backed executable pages after first usage. in page_check_references()
1005 if (mapping && mapping->a_ops->is_dirty_writeback) in page_check_dirty_writeback()
1006 mapping->a_ops->is_dirty_writeback(page, dirty, writeback); in page_check_dirty_writeback()
1036 list_del(&page->lru); in shrink_page_list()
1046 sc->nr_scanned += nr_pages; in shrink_page_list()
1051 if (!sc->may_unmap && page_mapped(page)) in shrink_page_list()
1054 may_enter_fs = (sc->gfp_mask & __GFP_FS) || in shrink_page_list()
1055 (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); in shrink_page_list()
1065 stat->nr_dirty++; in shrink_page_list()
1068 stat->nr_unqueued_dirty++; in shrink_page_list()
1078 inode_write_congested(mapping->host)) || in shrink_page_list()
1080 stat->nr_congested++; in shrink_page_list()
1128 test_bit(PGDAT_WRITEBACK, &pgdat->flags)) { in shrink_page_list()
1129 stat->nr_immediate++; in shrink_page_list()
1136 * This is slightly racy - end_page_writeback() in shrink_page_list()
1139 * as PageReadahead - but that does not matter in shrink_page_list()
1147 stat->nr_writeback++; in shrink_page_list()
1155 list_add_tail(&page->lru, page_list); in shrink_page_list()
1167 stat->nr_ref_keep += nr_pages; in shrink_page_list()
1182 if (!(sc->gfp_mask & __GFP_IO)) in shrink_page_list()
1233 sc->nr_scanned -= (nr_pages - 1); in shrink_page_list()
1249 stat->nr_unmap_fail += nr_pages; in shrink_page_list()
1251 stat->nr_lazyfree_fail += nr_pages; in shrink_page_list()
1260 * injecting inefficient single-page IO into in shrink_page_list()
1269 !test_bit(PGDAT_DIRTY, &pgdat->flags))) { in shrink_page_list()
1286 if (!sc->may_writepage) in shrink_page_list()
1301 stat->nr_pageout += thp_nr_pages(page); in shrink_page_list()
1309 * A synchronous write - probably a ramdisk. Go in shrink_page_list()
1334 * drop the buffers and mark the page clean - it can be freed. in shrink_page_list()
1336 * Rarely, pages can have buffers and no ->mapping. These are in shrink_page_list()
1344 if (!try_to_release_page(page, sc->gfp_mask)) in shrink_page_list()
1378 sc->target_mem_cgroup)) in shrink_page_list()
1396 list_add(&page->lru, &free_pages); in shrink_page_list()
1405 sc->nr_scanned -= (nr_pages - 1); in shrink_page_list()
1417 stat->nr_activate[type] += nr_pages; in shrink_page_list()
1423 list_add(&page->lru, &ret_pages); in shrink_page_list()
1427 pgactivate = stat->nr_activate[0] + stat->nr_activate[1]; in shrink_page_list()
1456 list_move(&page->lru, &clean_pages); in reclaim_clean_pages_from_list()
1460 nr_reclaimed = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc, in reclaim_clean_pages_from_list()
1463 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, in reclaim_clean_pages_from_list()
1464 -(long)nr_reclaimed); in reclaim_clean_pages_from_list()
1471 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, in reclaim_clean_pages_from_list()
1473 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, in reclaim_clean_pages_from_list()
1474 -(long)stat.nr_lazyfree_fail); in reclaim_clean_pages_from_list()
1486 * returns 0 on success, -ve errno on failure.
1490 int ret = -EINVAL; in __isolate_lru_page()
1500 ret = -EBUSY; in __isolate_lru_page()
1505 * blocking - clean pages for the most part. in __isolate_lru_page()
1521 * ->migratepage callback are possible to migrate in __isolate_lru_page()
1532 migrate_dirty = !mapping || mapping->a_ops->migratepage; in __isolate_lru_page()
1545 * sure the page is not being freed elsewhere -- the in __isolate_lru_page()
1569 update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]); in update_lru_sizes()
1575 * pgdat->lru_lock is heavily contended. Some of the functions that
1579 * For pagecache intensive workloads, this function is the hottest
1598 struct list_head *src = &lruvec->lists[lru]; in isolate_lru_pages()
1605 isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED); in isolate_lru_pages()
1620 if (page_zonenum(page) > sc->reclaim_idx) { in isolate_lru_pages()
1621 list_move(&page->lru, &pages_skipped); in isolate_lru_pages()
1633 * premature OOM since __isolate_lru_page() returns -EBUSY in isolate_lru_pages()
1641 list_move(&page->lru, dst); in isolate_lru_pages()
1644 case -EBUSY: in isolate_lru_pages()
1646 list_move(&page->lru, src); in isolate_lru_pages()
1674 trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, in isolate_lru_pages()
1681 * isolate_lru_page - tries to isolate a page from its LRU list
1688 * Returns -EBUSY if the page was not on an LRU list.
1708 int ret = -EBUSY; in isolate_lru_page()
1717 spin_lock_irq(&pgdat->lru_lock); in isolate_lru_page()
1726 spin_unlock_irq(&pgdat->lru_lock); in isolate_lru_page()
1759 * won't get blocked by normal direct-reclaimers, forming a circular in too_many_isolated()
1762 if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) in too_many_isolated()
1779 * It is safe to rely on PG_active against the non-LRU pages in here because
1780 * nobody will play with that bit on a non-LRU page.
1782 * The downside is that we have to touch page->_refcount against each page.
1783 * But we had to alter page->flags anyway.
1804 list_del(&page->lru); in move_pages_to_lru()
1805 spin_unlock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1807 spin_lock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1817 list_move(&page->lru, &lruvec->lists[lru]); in move_pages_to_lru()
1825 spin_unlock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1827 spin_lock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1829 list_add(&page->lru, &pages_to_free); in move_pages_to_lru()
1861 * If a kernel thread (such as nfsd for loop-back mounts) services
1868 return !(current->flags & PF_LOCAL_THROTTLE) || in current_may_throttle()
1869 current->backing_dev_info == NULL || in current_may_throttle()
1870 bdi_write_congested(current->backing_dev_info); in current_may_throttle()
1895 sc->isolate_count++; in shrink_inactive_list()
1908 spin_lock_irq(&pgdat->lru_lock); in shrink_inactive_list()
1920 spin_unlock_irq(&pgdat->lru_lock); in shrink_inactive_list()
1927 spin_lock_irq(&pgdat->lru_lock); in shrink_inactive_list()
1931 __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); in shrink_inactive_list()
1948 spin_unlock_irq(&pgdat->lru_lock); in shrink_inactive_list()
1967 sc->nr.dirty += stat.nr_dirty; in shrink_inactive_list()
1968 sc->nr.congested += stat.nr_congested; in shrink_inactive_list()
1969 sc->nr.unqueued_dirty += stat.nr_unqueued_dirty; in shrink_inactive_list()
1970 sc->nr.writeback += stat.nr_writeback; in shrink_inactive_list()
1971 sc->nr.immediate += stat.nr_immediate; in shrink_inactive_list()
1972 sc->nr.taken += nr_taken; in shrink_inactive_list()
1974 sc->nr.file_taken += nr_taken; in shrink_inactive_list()
1976 trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id, in shrink_inactive_list()
1977 nr_scanned, nr_reclaimed, &stat, sc->priority, file); in shrink_inactive_list()
2000 spin_lock_irq(&pgdat->lru_lock); in shrink_active_list()
2011 spin_unlock_irq(&pgdat->lru_lock); in shrink_active_list()
2016 list_del(&page->lru); in shrink_active_list()
2031 if (page_referenced(page, 0, sc->target_mem_cgroup, in shrink_active_list()
2034 * Identify referenced, file-backed active pages and in shrink_active_list()
2038 * are not likely to be evicted by use-once streaming in shrink_active_list()
2044 list_add(&page->lru, &l_active); in shrink_active_list()
2049 ClearPageActive(page); /* we are de-activating */ in shrink_active_list()
2051 list_add(&page->lru, &l_inactive); in shrink_active_list()
2057 spin_lock_irq(&pgdat->lru_lock); in shrink_active_list()
2067 __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); in shrink_active_list()
2068 spin_unlock_irq(&pgdat->lru_lock); in shrink_active_list()
2072 trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate, in shrink_active_list()
2073 nr_deactivate, nr_rotated, sc->priority, file); in shrink_active_list()
2100 list_move(&page->lru, &node_page_list); in reclaim_pages()
2109 list_del(&page->lru); in reclaim_pages()
2122 list_del(&page->lru); in reclaim_pages()
2141 if (sc->may_deactivate & (1 << is_file_lru(lru))) in shrink_list()
2144 sc->skipped_deactivate = 1; in shrink_list()
2165 * to the established workingset on the scan-resistant active list,
2179 * -------------------------------------
2198 gb = (inactive + active) >> (30 - PAGE_SHIFT); in inactive_is_low()
2230 if (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0) { in get_scan_count()
2252 if (!sc->priority && swappiness) { in get_scan_count()
2258 * If the system is almost out of file pages, force-scan anon. in get_scan_count()
2260 if (sc->file_is_tiny) { in get_scan_count()
2269 if (sc->cache_trim_mode) { in get_scan_count()
2290 total_cost = sc->anon_cost + sc->file_cost; in get_scan_count()
2291 anon_cost = total_cost + sc->anon_cost; in get_scan_count()
2292 file_cost = total_cost + sc->file_cost; in get_scan_count()
2298 fp = (200 - swappiness) * (total_cost + 1); in get_scan_count()
2311 lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); in get_scan_count()
2312 mem_cgroup_protection(sc->target_mem_cgroup, memcg, in get_scan_count()
2322 * becomes extremely binary -- from nothing as we in get_scan_count()
2337 * the best-effort low protection. However, we still in get_scan_count()
2338 * ideally want to honor how well-behaved groups are in in get_scan_count()
2349 if (!sc->memcg_low_reclaim && low > min) { in get_scan_count()
2351 sc->memcg_low_skipped = 1; in get_scan_count()
2359 scan = lruvec_size - lruvec_size * protection / in get_scan_count()
2365 * sc->priority further than desirable. in get_scan_count()
2372 scan >>= sc->priority; in get_scan_count()
2391 * round-off error. in get_scan_count()
2420 unsigned long nr_to_reclaim = sc->nr_to_reclaim; in shrink_lruvec()
2441 sc->priority == DEF_PRIORITY); in shrink_lruvec()
2452 nr[lru] -= nr_to_scan; in shrink_lruvec()
2504 nr_scanned = targets[lru] - nr[lru]; in shrink_lruvec()
2505 nr[lru] = targets[lru] * (100 - percentage) / 100; in shrink_lruvec()
2506 nr[lru] -= min(nr[lru], nr_scanned); in shrink_lruvec()
2509 nr_scanned = targets[lru] - nr[lru]; in shrink_lruvec()
2510 nr[lru] = targets[lru] * (100 - percentage) / 100; in shrink_lruvec()
2511 nr[lru] -= min(nr[lru], nr_scanned); in shrink_lruvec()
2516 sc->nr_reclaimed += nr_reclaimed; in shrink_lruvec()
2531 if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && in in_reclaim_compaction()
2532 (sc->order > PAGE_ALLOC_COSTLY_ORDER || in in_reclaim_compaction()
2533 sc->priority < DEF_PRIORITY - 2)) in in_reclaim_compaction()
2540 * Reclaim/compaction is used for high-order allocation requests. It reclaims
2541 * order-0 pages before compacting the zone. should_continue_reclaim() returns
2564 * first, by assuming that zero delta of sc->nr_scanned means full LRU in should_continue_reclaim()
2566 * where always a non-zero amount of pages were scanned. in should_continue_reclaim()
2572 for (z = 0; z <= sc->reclaim_idx; z++) { in should_continue_reclaim()
2573 struct zone *zone = &pgdat->node_zones[z]; in should_continue_reclaim()
2577 switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) { in should_continue_reclaim()
2591 pages_for_compaction = compact_gap(sc->order); in should_continue_reclaim()
2602 struct mem_cgroup *target_memcg = sc->target_mem_cgroup; in shrink_node_memcgs()
2612 * This loop can become CPU-bound when target memcgs in shrink_node_memcgs()
2613 * aren't eligible for reclaim - either because they in shrink_node_memcgs()
2634 if (!sc->memcg_low_reclaim) { in shrink_node_memcgs()
2635 sc->memcg_low_skipped = 1; in shrink_node_memcgs()
2641 reclaimed = sc->nr_reclaimed; in shrink_node_memcgs()
2642 scanned = sc->nr_scanned; in shrink_node_memcgs()
2646 shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, in shrink_node_memcgs()
2647 sc->priority); in shrink_node_memcgs()
2650 vmpressure(sc->gfp_mask, memcg, false, in shrink_node_memcgs()
2651 sc->nr_scanned - scanned, in shrink_node_memcgs()
2652 sc->nr_reclaimed - reclaimed); in shrink_node_memcgs()
2659 struct reclaim_state *reclaim_state = current->reclaim_state; in shrink_node()
2665 target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); in shrink_node()
2668 memset(&sc->nr, 0, sizeof(sc->nr)); in shrink_node()
2670 nr_reclaimed = sc->nr_reclaimed; in shrink_node()
2671 nr_scanned = sc->nr_scanned; in shrink_node()
2676 spin_lock_irq(&pgdat->lru_lock); in shrink_node()
2677 sc->anon_cost = target_lruvec->anon_cost; in shrink_node()
2678 sc->file_cost = target_lruvec->file_cost; in shrink_node()
2679 spin_unlock_irq(&pgdat->lru_lock); in shrink_node()
2685 if (!sc->force_deactivate) { in shrink_node()
2690 if (refaults != target_lruvec->refaults[0] || in shrink_node()
2692 sc->may_deactivate |= DEACTIVATE_ANON; in shrink_node()
2694 sc->may_deactivate &= ~DEACTIVATE_ANON; in shrink_node()
2703 if (refaults != target_lruvec->refaults[1] || in shrink_node()
2705 sc->may_deactivate |= DEACTIVATE_FILE; in shrink_node()
2707 sc->may_deactivate &= ~DEACTIVATE_FILE; in shrink_node()
2709 sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; in shrink_node()
2717 if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) in shrink_node()
2718 sc->cache_trim_mode = 1; in shrink_node()
2720 sc->cache_trim_mode = 0; in shrink_node()
2736 free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); in shrink_node()
2741 struct zone *zone = &pgdat->node_zones[z]; in shrink_node()
2755 sc->file_is_tiny = in shrink_node()
2757 !(sc->may_deactivate & DEACTIVATE_ANON) && in shrink_node()
2758 anon >> sc->priority; in shrink_node()
2764 sc->nr_reclaimed += reclaim_state->reclaimed_slab; in shrink_node()
2765 reclaim_state->reclaimed_slab = 0; in shrink_node()
2769 vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true, in shrink_node()
2770 sc->nr_scanned - nr_scanned, in shrink_node()
2771 sc->nr_reclaimed - nr_reclaimed); in shrink_node()
2773 if (sc->nr_reclaimed - nr_reclaimed) in shrink_node()
2779 * it implies that the long-lived page allocation rate in shrink_node()
2794 if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken) in shrink_node()
2795 set_bit(PGDAT_WRITEBACK, &pgdat->flags); in shrink_node()
2798 if (sc->nr.unqueued_dirty == sc->nr.file_taken) in shrink_node()
2799 set_bit(PGDAT_DIRTY, &pgdat->flags); in shrink_node()
2807 if (sc->nr.immediate) in shrink_node()
2821 sc->nr.dirty && sc->nr.dirty == sc->nr.congested) in shrink_node()
2822 set_bit(LRUVEC_CONGESTED, &target_lruvec->flags); in shrink_node()
2831 !sc->hibernation_mode && in shrink_node()
2832 test_bit(LRUVEC_CONGESTED, &target_lruvec->flags)) in shrink_node()
2835 if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, in shrink_node()
2846 pgdat->kswapd_failures = 0; in shrink_node()
2851 * Returns true if compaction should go ahead for a costly-order request, or
2860 suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx); in compaction_ready()
2877 watermark = high_wmark_pages(zone) + compact_gap(sc->order); in compaction_ready()
2879 return zone_watermark_ok_safe(zone, 0, watermark, sc->reclaim_idx); in compaction_ready()
2883 * This is the direct reclaim path, for page-allocating processes. We only
2904 orig_mask = sc->gfp_mask; in shrink_zones()
2906 sc->gfp_mask |= __GFP_HIGHMEM; in shrink_zones()
2907 sc->reclaim_idx = gfp_zone(sc->gfp_mask); in shrink_zones()
2911 sc->reclaim_idx, sc->nodemask) { in shrink_zones()
2925 * non-zero order, only frequent costly order in shrink_zones()
2931 sc->order > PAGE_ALLOC_COSTLY_ORDER && in shrink_zones()
2933 sc->compaction_ready = true; in shrink_zones()
2943 if (zone->zone_pgdat == last_pgdat) in shrink_zones()
2953 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone->zone_pgdat, in shrink_zones()
2954 sc->order, sc->gfp_mask, in shrink_zones()
2956 sc->nr_reclaimed += nr_soft_reclaimed; in shrink_zones()
2957 sc->nr_scanned += nr_soft_scanned; in shrink_zones()
2962 if (zone->zone_pgdat == last_pgdat) in shrink_zones()
2964 last_pgdat = zone->zone_pgdat; in shrink_zones()
2966 shrink_node_hyperhold(zone->zone_pgdat, sc); in shrink_zones()
2968 shrink_node(zone->zone_pgdat, sc); in shrink_zones()
2976 sc->gfp_mask = orig_mask; in shrink_zones()
2988 lruvec->refaults[0] = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE_ANON); /* modified */ in snapshot_refaults()
2989 lruvec->refaults[1] = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE_FILE); /* modified */ in snapshot_refaults()
2994 target_lruvec->refaults[0] = refaults; in snapshot_refaults()
2996 target_lruvec->refaults[1] = refaults; in snapshot_refaults()
3006 * high - the zone may be full of dirty or under-writeback pages, which this
3018 int initial_priority = sc->priority; in do_try_to_free_pages()
3026 __count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1); in do_try_to_free_pages()
3029 vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup, in do_try_to_free_pages()
3030 sc->priority); in do_try_to_free_pages()
3031 sc->nr_scanned = 0; in do_try_to_free_pages()
3034 if (sc->nr_reclaimed >= sc->nr_to_reclaim) in do_try_to_free_pages()
3037 if (sc->compaction_ready) in do_try_to_free_pages()
3044 if (sc->priority < DEF_PRIORITY - 2) in do_try_to_free_pages()
3045 sc->may_writepage = 1; in do_try_to_free_pages()
3046 } while (--sc->priority >= 0); in do_try_to_free_pages()
3049 for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx, in do_try_to_free_pages()
3050 sc->nodemask) { in do_try_to_free_pages()
3051 if (zone->zone_pgdat == last_pgdat) in do_try_to_free_pages()
3053 last_pgdat = zone->zone_pgdat; in do_try_to_free_pages()
3055 snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat); in do_try_to_free_pages()
3060 lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, in do_try_to_free_pages()
3061 zone->zone_pgdat); in do_try_to_free_pages()
3062 clear_bit(LRUVEC_CONGESTED, &lruvec->flags); in do_try_to_free_pages()
3068 if (sc->nr_reclaimed) in do_try_to_free_pages()
3069 return sc->nr_reclaimed; in do_try_to_free_pages()
3072 if (sc->compaction_ready) in do_try_to_free_pages()
3084 if (sc->skipped_deactivate) { in do_try_to_free_pages()
3085 sc->priority = initial_priority; in do_try_to_free_pages()
3086 sc->force_deactivate = 1; in do_try_to_free_pages()
3087 sc->skipped_deactivate = 0; in do_try_to_free_pages()
3092 if (sc->memcg_low_skipped) { in do_try_to_free_pages()
3093 sc->priority = initial_priority; in do_try_to_free_pages()
3094 sc->force_deactivate = 0; in do_try_to_free_pages()
3095 sc->memcg_low_reclaim = 1; in do_try_to_free_pages()
3096 sc->memcg_low_skipped = 0; in do_try_to_free_pages()
3111 if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) in allow_direct_reclaim()
3115 zone = &pgdat->node_zones[i]; in allow_direct_reclaim()
3133 if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) { in allow_direct_reclaim()
3134 if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL) in allow_direct_reclaim()
3135 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, ZONE_NORMAL); in allow_direct_reclaim()
3137 wake_up_interruptible(&pgdat->kswapd_wait); in allow_direct_reclaim()
3166 if (current->flags & PF_KTHREAD) in throttle_direct_reclaim()
3196 pgdat = zone->zone_pgdat; in throttle_direct_reclaim()
3218 wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, in throttle_direct_reclaim()
3225 wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, in throttle_direct_reclaim()
3293 .reclaim_idx = MAX_NR_ZONES - 1, in mem_cgroup_shrink_node()
3300 WARN_ON_ONCE(!current->reclaim_state); in mem_cgroup_shrink_node()
3345 .reclaim_idx = MAX_NR_ZONES - 1, in try_to_free_mem_cgroup_pages()
3401 * Check for watermark boosts top-down as the higher zones in pgdat_watermark_boosted()
3407 for (i = highest_zoneidx; i >= 0; i--) { in pgdat_watermark_boosted()
3408 zone = pgdat->node_zones + i; in pgdat_watermark_boosted()
3412 if (zone->watermark_boost) in pgdat_watermark_boosted()
3426 unsigned long mark = -1; in pgdat_balanced()
3430 * Check watermarks bottom-up as lower zones are more likely to in pgdat_balanced()
3434 zone = pgdat->node_zones + i; in pgdat_balanced()
3446 * need balancing by definition. This can happen if a zone-restricted in pgdat_balanced()
3449 if (mark == -1) in pgdat_balanced()
3460 clear_bit(LRUVEC_CONGESTED, &lruvec->flags); in clear_pgdat_congested()
3461 clear_bit(PGDAT_DIRTY, &pgdat->flags); in clear_pgdat_congested()
3462 clear_bit(PGDAT_WRITEBACK, &pgdat->flags); in clear_pgdat_congested()
3487 if (waitqueue_active(&pgdat->pfmemalloc_wait)) in prepare_kswapd_sleep()
3488 wake_up_all(&pgdat->pfmemalloc_wait); in prepare_kswapd_sleep()
3491 if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) in prepare_kswapd_sleep()
3517 sc->nr_to_reclaim = 0; in kswapd_shrink_node()
3518 for (z = 0; z <= sc->reclaim_idx; z++) { in kswapd_shrink_node()
3519 zone = pgdat->node_zones + z; in kswapd_shrink_node()
3523 sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX); in kswapd_shrink_node()
3538 * high-order allocations. If twice the allocation size has been in kswapd_shrink_node()
3539 * reclaimed then recheck watermarks only at order-0 to prevent in kswapd_shrink_node()
3540 * excessive reclaim. Assume that a process requested a high-order in kswapd_shrink_node()
3543 if (sc->order && sc->nr_reclaimed >= compact_gap(sc->order)) in kswapd_shrink_node()
3544 sc->order = 0; in kswapd_shrink_node()
3546 return sc->nr_scanned >= sc->nr_to_reclaim; in kswapd_shrink_node()
3556 * kswapd scans the zones in the highmem->normal->dma direction. It skips
3591 zone = pgdat->node_zones + i; in balance_pgdat()
3595 nr_boost_reclaim += zone->watermark_boost; in balance_pgdat()
3596 zone_boosts[i] = zone->watermark_boost; in balance_pgdat()
3613 * purpose -- on 64-bit systems it is expected that in balance_pgdat()
3614 * buffer_heads are stripped during active rotation. On 32-bit in balance_pgdat()
3621 for (i = MAX_NR_ZONES - 1; i >= 0; i--) { in balance_pgdat()
3622 zone = pgdat->node_zones + i; in balance_pgdat()
3636 * re-evaluate if boosting is required when kswapd next wakes. in balance_pgdat()
3653 if (nr_boost_reclaim && sc.priority == DEF_PRIORITY - 2) in balance_pgdat()
3658 * intent is to relieve pressure not issue sub-optimal IO in balance_pgdat()
3677 if (sc.priority < DEF_PRIORITY - 2) in balance_pgdat()
3700 if (waitqueue_active(&pgdat->pfmemalloc_wait) && in balance_pgdat()
3702 wake_up_all(&pgdat->pfmemalloc_wait); in balance_pgdat()
3715 nr_reclaimed = sc.nr_reclaimed - nr_reclaimed; in balance_pgdat()
3716 nr_boost_reclaim -= min(nr_boost_reclaim, nr_reclaimed); in balance_pgdat()
3727 sc.priority--; in balance_pgdat()
3731 pgdat->kswapd_failures++; in balance_pgdat()
3743 zone = pgdat->node_zones + i; in balance_pgdat()
3744 spin_lock_irqsave(&zone->lock, flags); in balance_pgdat()
3745 zone->watermark_boost -= min(zone->watermark_boost, zone_boosts[i]); in balance_pgdat()
3746 spin_unlock_irqrestore(&zone->lock, flags); in balance_pgdat()
3771 * The pgdat->kswapd_highest_zoneidx is used to pass the highest zone index to
3780 enum zone_type curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx); in kswapd_highest_zoneidx()
3794 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); in kswapd_try_to_sleep()
3826 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, in kswapd_try_to_sleep()
3830 if (READ_ONCE(pgdat->kswapd_order) < reclaim_order) in kswapd_try_to_sleep()
3831 WRITE_ONCE(pgdat->kswapd_order, reclaim_order); in kswapd_try_to_sleep()
3834 finish_wait(&pgdat->kswapd_wait, &wait); in kswapd_try_to_sleep()
3835 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); in kswapd_try_to_sleep()
3844 trace_mm_vmscan_kswapd_sleep(pgdat->node_id); in kswapd_try_to_sleep()
3851 * per-cpu vmstat threshold while kswapd is awake and restore in kswapd_try_to_sleep()
3866 finish_wait(&pgdat->kswapd_wait, &wait); in kswapd_try_to_sleep()
3879 * If there are applications that are active memory-allocators
3885 unsigned int highest_zoneidx = MAX_NR_ZONES - 1; in kswapd()
3888 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); in kswapd()
3908 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; in kswapd()
3911 WRITE_ONCE(pgdat->kswapd_order, 0); in kswapd()
3912 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES); in kswapd()
3916 alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order); in kswapd()
3925 alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order); in kswapd()
3928 WRITE_ONCE(pgdat->kswapd_order, 0); in kswapd()
3929 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES); in kswapd()
3943 * Reclaim begins at the requested order but if a high-order in kswapd()
3945 * order-0. If that happens, kswapd will consider sleeping in kswapd()
3950 trace_mm_vmscan_kswapd_wake(pgdat->node_id, highest_zoneidx, in kswapd()
3964 tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); in kswapd()
3970 * A zone is low on free memory or too fragmented for high-order memory. If
3988 pgdat = zone->zone_pgdat; in wakeup_kswapd()
3989 curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx); in wakeup_kswapd()
3992 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, highest_zoneidx); in wakeup_kswapd()
3994 if (READ_ONCE(pgdat->kswapd_order) < order) in wakeup_kswapd()
3995 WRITE_ONCE(pgdat->kswapd_order, order); in wakeup_kswapd()
3997 if (!waitqueue_active(&pgdat->kswapd_wait)) in wakeup_kswapd()
4001 if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES || in wakeup_kswapd()
4006 * fragmented for high-order allocations. Wake up kcompactd in wakeup_kswapd()
4016 trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, highest_zoneidx, order, in wakeup_kswapd()
4018 wake_up_interruptible(&pgdat->kswapd_wait); in wakeup_kswapd()
4023 * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
4035 .reclaim_idx = MAX_NR_ZONES - 1, in shrink_all_memory()
4061 * This kswapd start function will be called by init and node-hot-add.
4062 * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
4069 if (pgdat->kswapd) in kswapd_run()
4072 pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid); in kswapd_run()
4073 if (IS_ERR(pgdat->kswapd)) { in kswapd_run()
4077 ret = PTR_ERR(pgdat->kswapd); in kswapd_run()
4078 pgdat->kswapd = NULL; in kswapd_run()
4089 struct task_struct *kswapd = NODE_DATA(nid)->kswapd; in kswapd_stop()
4093 NODE_DATA(nid)->kswapd = NULL; in kswapd_stop()
4120 * If non-zero call node_reclaim when the number of free pages falls below
4163 return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0; in node_unmapped_file_pages()
4191 return nr_pagecache_reclaimable - delta; in node_pagecache_reclaimable()
4215 trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, in __node_reclaim()
4227 p->flags |= PF_SWAPWRITE; in __node_reclaim()
4230 if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) { in __node_reclaim()
4241 } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); in __node_reclaim()
4245 current->flags &= ~PF_SWAPWRITE; in __node_reclaim()
4269 if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages && in node_reclaim()
4271 pgdat->min_slab_pages) in node_reclaim()
4277 if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC)) in node_reclaim()
4282 * have associated processors. This will favor the local processor in node_reclaim()
4286 if (node_state(pgdat->node_id, N_CPU) && pgdat->node_id != numa_node_id()) in node_reclaim()
4289 if (test_and_set_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags)) in node_reclaim()
4293 clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags); in node_reclaim()
4303 * check_move_unevictable_pages - check pages for evictability and move to
4319 for (i = 0; i < pvec->nr; i++) { in check_move_unevictable_pages()
4320 struct page *page = pvec->pages[i]; in check_move_unevictable_pages()
4332 spin_unlock_irq(&pgdat->lru_lock); in check_move_unevictable_pages()
4334 spin_lock_irq(&pgdat->lru_lock); in check_move_unevictable_pages()
4355 spin_unlock_irq(&pgdat->lru_lock); in check_move_unevictable_pages()
4371 shrink_list(LRU_ACTIVE_PURGEABLE, -1, lruvec, sc); in purgeable_node()
4372 nr += shrink_list(LRU_INACTIVE_PURGEABLE, -1, lruvec, sc); in purgeable_node()
4391 .reclaim_idx = MAX_NR_ZONES - 1, in purgeable()