• Home
  • Raw
  • Download

Lines Matching +full:processor +full:- +full:intensive

1 // SPDX-License-Identifier: GPL-2.0
34 #include <linux/backing-dev.h>
73 if ((_page)->lru.prev != _base) { \
76 prev = lru_to_page(&(_page->lru)); \
77 prefetchw(&prev->_field); \
97 WARN_ON_ONCE(rs && task->reclaim_state); in set_task_reclaim_state()
99 /* Check for the nulling of an already-nulled member */ in set_task_reclaim_state()
100 WARN_ON_ONCE(!rs && !task->reclaim_state); in set_task_reclaim_state()
102 task->reclaim_state = rs; in set_task_reclaim_state()
115 int id, ret = -ENOMEM; in prealloc_memcg_shrinker()
131 shrinker->id = id; in prealloc_memcg_shrinker()
140 int id = shrinker->id; in unregister_memcg_shrinker()
151 return sc->target_mem_cgroup; in cgroup_reclaim()
155 * writeback_throttling_sane - is the usual dirty throttling mechanism available?
217 * lruvec_lru_size - Returns the number of pages on the given LRU list.
230 struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid]; in lruvec_lru_size()
242 struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid]; in lruvec_lru_size()
260 unsigned int size = sizeof(*shrinker->nr_deferred); in prealloc_shrinker()
262 if (shrinker->flags & SHRINKER_NUMA_AWARE) in prealloc_shrinker()
265 shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); in prealloc_shrinker()
266 if (!shrinker->nr_deferred) in prealloc_shrinker()
267 return -ENOMEM; in prealloc_shrinker()
269 if (shrinker->flags & SHRINKER_MEMCG_AWARE) { in prealloc_shrinker()
277 kfree(shrinker->nr_deferred); in prealloc_shrinker()
278 shrinker->nr_deferred = NULL; in prealloc_shrinker()
279 return -ENOMEM; in prealloc_shrinker()
284 if (!shrinker->nr_deferred) in free_prealloced_shrinker()
287 if (shrinker->flags & SHRINKER_MEMCG_AWARE) { in free_prealloced_shrinker()
293 kfree(shrinker->nr_deferred); in free_prealloced_shrinker()
294 shrinker->nr_deferred = NULL; in free_prealloced_shrinker()
300 list_add_tail(&shrinker->list, &shrinker_list); in register_shrinker_prepared()
301 shrinker->flags |= SHRINKER_REGISTERED; in register_shrinker_prepared()
321 if (!(shrinker->flags & SHRINKER_REGISTERED)) in unregister_shrinker()
325 list_del(&shrinker->list); in unregister_shrinker()
326 shrinker->flags &= ~SHRINKER_REGISTERED; in unregister_shrinker()
327 if (shrinker->flags & SHRINKER_MEMCG_AWARE) in unregister_shrinker()
331 kfree(shrinker->nr_deferred); in unregister_shrinker()
332 shrinker->nr_deferred = NULL; in unregister_shrinker()
347 int nid = shrinkctl->nid; in do_shrink_slab()
348 long batch_size = shrinker->batch ? shrinker->batch in do_shrink_slab()
352 if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) in do_shrink_slab()
355 freeable = shrinker->count_objects(shrinker, shrinkctl); in do_shrink_slab()
364 nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0); in do_shrink_slab()
367 if (shrinker->seeks) { in do_shrink_slab()
370 do_div(delta, shrinker->seeks); in do_shrink_slab()
383 shrinker->scan_objects, total_scan); in do_shrink_slab()
392 * shrinkers to return -1 all the time. This results in a large in do_shrink_slab()
435 shrinkctl->nr_to_scan = nr_to_scan; in do_shrink_slab()
436 shrinkctl->nr_scanned = nr_to_scan; in do_shrink_slab()
437 ret = shrinker->scan_objects(shrinker, shrinkctl); in do_shrink_slab()
442 count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned); in do_shrink_slab()
443 total_scan -= shrinkctl->nr_scanned; in do_shrink_slab()
444 scanned += shrinkctl->nr_scanned; in do_shrink_slab()
450 next_deferred -= scanned; in do_shrink_slab()
460 &shrinker->nr_deferred[nid]); in do_shrink_slab()
462 new_nr = atomic_long_read(&shrinker->nr_deferred[nid]); in do_shrink_slab()
482 map = rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_map, in shrink_slab_memcg()
487 for_each_set_bit(i, map->map, shrinker_nr_max) { in shrink_slab_memcg()
496 if (unlikely(!shrinker || !(shrinker->flags & SHRINKER_REGISTERED))) { in shrink_slab_memcg()
498 clear_bit(i, map->map); in shrink_slab_memcg()
502 /* Call non-slab shrinkers even though kmem is disabled */ in shrink_slab_memcg()
504 !(shrinker->flags & SHRINKER_NONSLAB)) in shrink_slab_memcg()
509 clear_bit(i, map->map); in shrink_slab_memcg()
552 * shrink_slab - shrink slab caches
566 * @priority is sc->priority, we take the number of objects and >> by priority
651 * heads at page->private. in is_page_cache_freeable()
654 return page_count(page) - page_has_private(page) == 1 + page_cache_pins; in is_page_cache_freeable()
659 if (current->flags & PF_SWAPWRITE) in may_write_to_inode()
663 if (inode_to_bdi(inode) == current->backing_dev_info) in may_write_to_inode()
670 * -ENOSPC. We need to propagate that into the address_space for a subsequent
703 * Calls ->writepage().
709 * will be non-blocking. To prevent this allocation from being in pageout()
728 * page->mapping == NULL while being dirty with clean buffers. in pageout()
739 if (mapping->a_ops->writepage == NULL) in pageout()
741 if (!may_write_to_inode(mapping->host)) in pageout()
755 res = mapping->a_ops->writepage(page, &wbc); in pageout()
789 xa_lock_irqsave(&mapping->i_pages, flags); in __remove_mapping()
809 * escape unnoticed. The smp_rmb is needed to ensure the page->flags in __remove_mapping()
810 * load is not satisfied before that of page->_refcount. in __remove_mapping()
830 xa_unlock_irqrestore(&mapping->i_pages, flags); in __remove_mapping()
835 freepage = mapping->a_ops->freepage; in __remove_mapping()
856 xa_unlock_irqrestore(&mapping->i_pages, flags); in __remove_mapping()
865 xa_unlock_irqrestore(&mapping->i_pages, flags); in __remove_mapping()
870 * Attempt to detach a locked page from its ->mapping. If it is dirty or if
890 * putback_lru_page - put previously isolated page onto appropriate LRU list
917 referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup, in page_check_references()
949 * Activate file-backed executable pages after first usage. in page_check_references()
990 if (mapping && mapping->a_ops->is_dirty_writeback) in page_check_dirty_writeback()
991 mapping->a_ops->is_dirty_writeback(page, dirty, writeback); in page_check_dirty_writeback()
1021 list_del(&page->lru); in shrink_page_list()
1031 sc->nr_scanned += nr_pages; in shrink_page_list()
1036 if (!sc->may_unmap && page_mapped(page)) in shrink_page_list()
1039 may_enter_fs = (sc->gfp_mask & __GFP_FS) || in shrink_page_list()
1040 (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); in shrink_page_list()
1050 stat->nr_dirty++; in shrink_page_list()
1053 stat->nr_unqueued_dirty++; in shrink_page_list()
1063 inode_write_congested(mapping->host)) || in shrink_page_list()
1065 stat->nr_congested++; in shrink_page_list()
1113 test_bit(PGDAT_WRITEBACK, &pgdat->flags)) { in shrink_page_list()
1114 stat->nr_immediate++; in shrink_page_list()
1121 * This is slightly racy - end_page_writeback() in shrink_page_list()
1124 * as PageReadahead - but that does not matter in shrink_page_list()
1132 stat->nr_writeback++; in shrink_page_list()
1140 list_add_tail(&page->lru, page_list); in shrink_page_list()
1152 stat->nr_ref_keep += nr_pages; in shrink_page_list()
1166 if (!(sc->gfp_mask & __GFP_IO)) in shrink_page_list()
1217 sc->nr_scanned -= (nr_pages - 1); in shrink_page_list()
1233 stat->nr_unmap_fail += nr_pages; in shrink_page_list()
1235 stat->nr_lazyfree_fail += nr_pages; in shrink_page_list()
1244 * injecting inefficient single-page IO into in shrink_page_list()
1253 !test_bit(PGDAT_DIRTY, &pgdat->flags))) { in shrink_page_list()
1270 if (!sc->may_writepage) in shrink_page_list()
1285 stat->nr_pageout += thp_nr_pages(page); in shrink_page_list()
1293 * A synchronous write - probably a ramdisk. Go in shrink_page_list()
1318 * drop the buffers and mark the page clean - it can be freed. in shrink_page_list()
1320 * Rarely, pages can have buffers and no ->mapping. These are in shrink_page_list()
1328 if (!try_to_release_page(page, sc->gfp_mask)) in shrink_page_list()
1360 sc->target_mem_cgroup)) in shrink_page_list()
1378 list_add(&page->lru, &free_pages); in shrink_page_list()
1387 sc->nr_scanned -= (nr_pages - 1); in shrink_page_list()
1399 stat->nr_activate[type] += nr_pages; in shrink_page_list()
1405 list_add(&page->lru, &ret_pages); in shrink_page_list()
1409 pgactivate = stat->nr_activate[0] + stat->nr_activate[1]; in shrink_page_list()
1438 list_move(&page->lru, &clean_pages); in reclaim_clean_pages_from_list()
1442 nr_reclaimed = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc, in reclaim_clean_pages_from_list()
1445 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, in reclaim_clean_pages_from_list()
1446 -(long)nr_reclaimed); in reclaim_clean_pages_from_list()
1453 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, in reclaim_clean_pages_from_list()
1455 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, in reclaim_clean_pages_from_list()
1456 -(long)stat.nr_lazyfree_fail); in reclaim_clean_pages_from_list()
1468 * returns 0 on success, -ve errno on failure.
1472 int ret = -EINVAL; in __isolate_lru_page()
1482 ret = -EBUSY; in __isolate_lru_page()
1487 * blocking - clean pages for the most part. in __isolate_lru_page()
1503 * ->migratepage callback are possible to migrate in __isolate_lru_page()
1514 migrate_dirty = !mapping || mapping->a_ops->migratepage; in __isolate_lru_page()
1527 * sure the page is not being freed elsewhere -- the in __isolate_lru_page()
1551 update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]); in update_lru_sizes()
1557 * pgdat->lru_lock is heavily contended. Some of the functions that
1561 * For pagecache intensive workloads, this function is the hottest
1580 struct list_head *src = &lruvec->lists[lru]; in isolate_lru_pages()
1587 isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED); in isolate_lru_pages()
1602 if (page_zonenum(page) > sc->reclaim_idx) { in isolate_lru_pages()
1603 list_move(&page->lru, &pages_skipped); in isolate_lru_pages()
1615 * premature OOM since __isolate_lru_page() returns -EBUSY in isolate_lru_pages()
1623 list_move(&page->lru, dst); in isolate_lru_pages()
1626 case -EBUSY: in isolate_lru_pages()
1628 list_move(&page->lru, src); in isolate_lru_pages()
1656 trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, in isolate_lru_pages()
1663 * isolate_lru_page - tries to isolate a page from its LRU list
1670 * Returns -EBUSY if the page was not on an LRU list.
1690 int ret = -EBUSY; in isolate_lru_page()
1699 spin_lock_irq(&pgdat->lru_lock); in isolate_lru_page()
1708 spin_unlock_irq(&pgdat->lru_lock); in isolate_lru_page()
1741 * won't get blocked by normal direct-reclaimers, forming a circular in too_many_isolated()
1744 if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) in too_many_isolated()
1761 * It is safe to rely on PG_active against the non-LRU pages in here because
1762 * nobody will play with that bit on a non-LRU page.
1764 * The downside is that we have to touch page->_refcount against each page.
1765 * But we had to alter page->flags anyway.
1786 list_del(&page->lru); in move_pages_to_lru()
1787 spin_unlock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1789 spin_lock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1799 list_move(&page->lru, &lruvec->lists[lru]); in move_pages_to_lru()
1807 spin_unlock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1809 spin_lock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1811 list_add(&page->lru, &pages_to_free); in move_pages_to_lru()
1843 * If a kernel thread (such as nfsd for loop-back mounts) services
1850 return !(current->flags & PF_LOCAL_THROTTLE) || in current_may_throttle()
1851 current->backing_dev_info == NULL || in current_may_throttle()
1852 bdi_write_congested(current->backing_dev_info); in current_may_throttle()
1877 sc->isolate_count++; in shrink_inactive_list()
1890 spin_lock_irq(&pgdat->lru_lock); in shrink_inactive_list()
1902 spin_unlock_irq(&pgdat->lru_lock); in shrink_inactive_list()
1909 spin_lock_irq(&pgdat->lru_lock); in shrink_inactive_list()
1913 __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); in shrink_inactive_list()
1930 spin_unlock_irq(&pgdat->lru_lock); in shrink_inactive_list()
1949 sc->nr.dirty += stat.nr_dirty; in shrink_inactive_list()
1950 sc->nr.congested += stat.nr_congested; in shrink_inactive_list()
1951 sc->nr.unqueued_dirty += stat.nr_unqueued_dirty; in shrink_inactive_list()
1952 sc->nr.writeback += stat.nr_writeback; in shrink_inactive_list()
1953 sc->nr.immediate += stat.nr_immediate; in shrink_inactive_list()
1954 sc->nr.taken += nr_taken; in shrink_inactive_list()
1956 sc->nr.file_taken += nr_taken; in shrink_inactive_list()
1958 trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id, in shrink_inactive_list()
1959 nr_scanned, nr_reclaimed, &stat, sc->priority, file); in shrink_inactive_list()
1982 spin_lock_irq(&pgdat->lru_lock); in shrink_active_list()
1993 spin_unlock_irq(&pgdat->lru_lock); in shrink_active_list()
1998 list_del(&page->lru); in shrink_active_list()
2013 if (page_referenced(page, 0, sc->target_mem_cgroup, in shrink_active_list()
2016 * Identify referenced, file-backed active pages and in shrink_active_list()
2020 * are not likely to be evicted by use-once streaming in shrink_active_list()
2026 list_add(&page->lru, &l_active); in shrink_active_list()
2031 ClearPageActive(page); /* we are de-activating */ in shrink_active_list()
2033 list_add(&page->lru, &l_inactive); in shrink_active_list()
2039 spin_lock_irq(&pgdat->lru_lock); in shrink_active_list()
2049 __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); in shrink_active_list()
2050 spin_unlock_irq(&pgdat->lru_lock); in shrink_active_list()
2054 trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate, in shrink_active_list()
2055 nr_deactivate, nr_rotated, sc->priority, file); in shrink_active_list()
2082 list_move(&page->lru, &node_page_list); in reclaim_pages()
2091 list_del(&page->lru); in reclaim_pages()
2104 list_del(&page->lru); in reclaim_pages()
2116 if (sc->may_deactivate & (1 << is_file_lru(lru))) in shrink_list()
2119 sc->skipped_deactivate = 1; in shrink_list()
2131 * to the established workingset on the scan-resistant active list,
2145 * -------------------------------------
2164 gb = (inactive + active) >> (30 - PAGE_SHIFT); in inactive_is_low()
2196 if (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0) { in get_scan_count()
2218 if (!sc->priority && swappiness) { in get_scan_count()
2224 * If the system is almost out of file pages, force-scan anon. in get_scan_count()
2226 if (sc->file_is_tiny) { in get_scan_count()
2235 if (sc->cache_trim_mode) { in get_scan_count()
2256 total_cost = sc->anon_cost + sc->file_cost; in get_scan_count()
2257 anon_cost = total_cost + sc->anon_cost; in get_scan_count()
2258 file_cost = total_cost + sc->file_cost; in get_scan_count()
2264 fp = (200 - swappiness) * (total_cost + 1); in get_scan_count()
2277 lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); in get_scan_count()
2278 mem_cgroup_protection(sc->target_mem_cgroup, memcg, in get_scan_count()
2288 * becomes extremely binary -- from nothing as we in get_scan_count()
2303 * the best-effort low protection. However, we still in get_scan_count()
2304 * ideally want to honor how well-behaved groups are in in get_scan_count()
2315 if (!sc->memcg_low_reclaim && low > min) { in get_scan_count()
2317 sc->memcg_low_skipped = 1; in get_scan_count()
2325 scan = lruvec_size - lruvec_size * protection / in get_scan_count()
2331 * sc->priority further than desirable. in get_scan_count()
2338 scan >>= sc->priority; in get_scan_count()
2357 * round-off error. in get_scan_count()
2386 unsigned long nr_to_reclaim = sc->nr_to_reclaim; in shrink_lruvec()
2407 sc->priority == DEF_PRIORITY); in shrink_lruvec()
2418 nr[lru] -= nr_to_scan; in shrink_lruvec()
2470 nr_scanned = targets[lru] - nr[lru]; in shrink_lruvec()
2471 nr[lru] = targets[lru] * (100 - percentage) / 100; in shrink_lruvec()
2472 nr[lru] -= min(nr[lru], nr_scanned); in shrink_lruvec()
2475 nr_scanned = targets[lru] - nr[lru]; in shrink_lruvec()
2476 nr[lru] = targets[lru] * (100 - percentage) / 100; in shrink_lruvec()
2477 nr[lru] -= min(nr[lru], nr_scanned); in shrink_lruvec()
2482 sc->nr_reclaimed += nr_reclaimed; in shrink_lruvec()
2497 if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && in in_reclaim_compaction()
2498 (sc->order > PAGE_ALLOC_COSTLY_ORDER || in in_reclaim_compaction()
2499 sc->priority < DEF_PRIORITY - 2)) in in_reclaim_compaction()
2506 * Reclaim/compaction is used for high-order allocation requests. It reclaims
2507 * order-0 pages before compacting the zone. should_continue_reclaim() returns
2530 * first, by assuming that zero delta of sc->nr_scanned means full LRU in should_continue_reclaim()
2532 * where always a non-zero amount of pages were scanned. in should_continue_reclaim()
2538 for (z = 0; z <= sc->reclaim_idx; z++) { in should_continue_reclaim()
2539 struct zone *zone = &pgdat->node_zones[z]; in should_continue_reclaim()
2543 switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) { in should_continue_reclaim()
2557 pages_for_compaction = compact_gap(sc->order); in should_continue_reclaim()
2568 struct mem_cgroup *target_memcg = sc->target_mem_cgroup; in shrink_node_memcgs()
2578 * This loop can become CPU-bound when target memcgs in shrink_node_memcgs()
2579 * aren't eligible for reclaim - either because they in shrink_node_memcgs()
2600 if (!sc->memcg_low_reclaim) { in shrink_node_memcgs()
2601 sc->memcg_low_skipped = 1; in shrink_node_memcgs()
2607 reclaimed = sc->nr_reclaimed; in shrink_node_memcgs()
2608 scanned = sc->nr_scanned; in shrink_node_memcgs()
2612 shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, in shrink_node_memcgs()
2613 sc->priority); in shrink_node_memcgs()
2616 vmpressure(sc->gfp_mask, memcg, false, in shrink_node_memcgs()
2617 sc->nr_scanned - scanned, in shrink_node_memcgs()
2618 sc->nr_reclaimed - reclaimed); in shrink_node_memcgs()
2625 struct reclaim_state *reclaim_state = current->reclaim_state; in shrink_node()
2631 target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); in shrink_node()
2634 memset(&sc->nr, 0, sizeof(sc->nr)); in shrink_node()
2636 nr_reclaimed = sc->nr_reclaimed; in shrink_node()
2637 nr_scanned = sc->nr_scanned; in shrink_node()
2642 spin_lock_irq(&pgdat->lru_lock); in shrink_node()
2643 sc->anon_cost = target_lruvec->anon_cost; in shrink_node()
2644 sc->file_cost = target_lruvec->file_cost; in shrink_node()
2645 spin_unlock_irq(&pgdat->lru_lock); in shrink_node()
2651 if (!sc->force_deactivate) { in shrink_node()
2656 if (refaults != target_lruvec->refaults[0] || in shrink_node()
2658 sc->may_deactivate |= DEACTIVATE_ANON; in shrink_node()
2660 sc->may_deactivate &= ~DEACTIVATE_ANON; in shrink_node()
2669 if (refaults != target_lruvec->refaults[1] || in shrink_node()
2671 sc->may_deactivate |= DEACTIVATE_FILE; in shrink_node()
2673 sc->may_deactivate &= ~DEACTIVATE_FILE; in shrink_node()
2675 sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; in shrink_node()
2683 if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) in shrink_node()
2684 sc->cache_trim_mode = 1; in shrink_node()
2686 sc->cache_trim_mode = 0; in shrink_node()
2702 free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); in shrink_node()
2707 struct zone *zone = &pgdat->node_zones[z]; in shrink_node()
2721 sc->file_is_tiny = in shrink_node()
2723 !(sc->may_deactivate & DEACTIVATE_ANON) && in shrink_node()
2724 anon >> sc->priority; in shrink_node()
2730 sc->nr_reclaimed += reclaim_state->reclaimed_slab; in shrink_node()
2731 reclaim_state->reclaimed_slab = 0; in shrink_node()
2735 vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true, in shrink_node()
2736 sc->nr_scanned - nr_scanned, in shrink_node()
2737 sc->nr_reclaimed - nr_reclaimed); in shrink_node()
2739 if (sc->nr_reclaimed - nr_reclaimed) in shrink_node()
2745 * it implies that the long-lived page allocation rate in shrink_node()
2760 if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken) in shrink_node()
2761 set_bit(PGDAT_WRITEBACK, &pgdat->flags); in shrink_node()
2764 if (sc->nr.unqueued_dirty == sc->nr.file_taken) in shrink_node()
2765 set_bit(PGDAT_DIRTY, &pgdat->flags); in shrink_node()
2773 if (sc->nr.immediate) in shrink_node()
2787 sc->nr.dirty && sc->nr.dirty == sc->nr.congested) in shrink_node()
2788 set_bit(LRUVEC_CONGESTED, &target_lruvec->flags); in shrink_node()
2797 !sc->hibernation_mode && in shrink_node()
2798 test_bit(LRUVEC_CONGESTED, &target_lruvec->flags)) in shrink_node()
2801 if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, in shrink_node()
2812 pgdat->kswapd_failures = 0; in shrink_node()
2817 * Returns true if compaction should go ahead for a costly-order request, or
2826 suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx); in compaction_ready()
2843 watermark = high_wmark_pages(zone) + compact_gap(sc->order); in compaction_ready()
2845 return zone_watermark_ok_safe(zone, 0, watermark, sc->reclaim_idx); in compaction_ready()
2849 * This is the direct reclaim path, for page-allocating processes. We only
2870 orig_mask = sc->gfp_mask; in shrink_zones()
2872 sc->gfp_mask |= __GFP_HIGHMEM; in shrink_zones()
2873 sc->reclaim_idx = gfp_zone(sc->gfp_mask); in shrink_zones()
2877 sc->reclaim_idx, sc->nodemask) { in shrink_zones()
2891 * non-zero order, only frequent costly order in shrink_zones()
2897 sc->order > PAGE_ALLOC_COSTLY_ORDER && in shrink_zones()
2899 sc->compaction_ready = true; in shrink_zones()
2909 if (zone->zone_pgdat == last_pgdat) in shrink_zones()
2919 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone->zone_pgdat, in shrink_zones()
2920 sc->order, sc->gfp_mask, in shrink_zones()
2922 sc->nr_reclaimed += nr_soft_reclaimed; in shrink_zones()
2923 sc->nr_scanned += nr_soft_scanned; in shrink_zones()
2928 if (zone->zone_pgdat == last_pgdat) in shrink_zones()
2930 last_pgdat = zone->zone_pgdat; in shrink_zones()
2932 shrink_node_hyperhold(zone->zone_pgdat, sc); in shrink_zones()
2934 shrink_node(zone->zone_pgdat, sc); in shrink_zones()
2942 sc->gfp_mask = orig_mask; in shrink_zones()
2954 lruvec->refaults[0] = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE_ANON); /* modified */ in snapshot_refaults()
2955 lruvec->refaults[1] = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE_FILE); /* modified */ in snapshot_refaults()
2960 target_lruvec->refaults[0] = refaults; in snapshot_refaults()
2962 target_lruvec->refaults[1] = refaults; in snapshot_refaults()
2972 * high - the zone may be full of dirty or under-writeback pages, which this
2984 int initial_priority = sc->priority; in do_try_to_free_pages()
2992 __count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1); in do_try_to_free_pages()
2995 vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup, in do_try_to_free_pages()
2996 sc->priority); in do_try_to_free_pages()
2997 sc->nr_scanned = 0; in do_try_to_free_pages()
3000 if (sc->nr_reclaimed >= sc->nr_to_reclaim) in do_try_to_free_pages()
3003 if (sc->compaction_ready) in do_try_to_free_pages()
3010 if (sc->priority < DEF_PRIORITY - 2) in do_try_to_free_pages()
3011 sc->may_writepage = 1; in do_try_to_free_pages()
3012 } while (--sc->priority >= 0); in do_try_to_free_pages()
3015 for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx, in do_try_to_free_pages()
3016 sc->nodemask) { in do_try_to_free_pages()
3017 if (zone->zone_pgdat == last_pgdat) in do_try_to_free_pages()
3019 last_pgdat = zone->zone_pgdat; in do_try_to_free_pages()
3021 snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat); in do_try_to_free_pages()
3026 lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, in do_try_to_free_pages()
3027 zone->zone_pgdat); in do_try_to_free_pages()
3028 clear_bit(LRUVEC_CONGESTED, &lruvec->flags); in do_try_to_free_pages()
3034 if (sc->nr_reclaimed) in do_try_to_free_pages()
3035 return sc->nr_reclaimed; in do_try_to_free_pages()
3038 if (sc->compaction_ready) in do_try_to_free_pages()
3050 if (sc->skipped_deactivate) { in do_try_to_free_pages()
3051 sc->priority = initial_priority; in do_try_to_free_pages()
3052 sc->force_deactivate = 1; in do_try_to_free_pages()
3053 sc->skipped_deactivate = 0; in do_try_to_free_pages()
3058 if (sc->memcg_low_skipped) { in do_try_to_free_pages()
3059 sc->priority = initial_priority; in do_try_to_free_pages()
3060 sc->force_deactivate = 0; in do_try_to_free_pages()
3061 sc->memcg_low_reclaim = 1; in do_try_to_free_pages()
3062 sc->memcg_low_skipped = 0; in do_try_to_free_pages()
3077 if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) in allow_direct_reclaim()
3081 zone = &pgdat->node_zones[i]; in allow_direct_reclaim()
3099 if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) { in allow_direct_reclaim()
3100 if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL) in allow_direct_reclaim()
3101 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, ZONE_NORMAL); in allow_direct_reclaim()
3103 wake_up_interruptible(&pgdat->kswapd_wait); in allow_direct_reclaim()
3132 if (current->flags & PF_KTHREAD) in throttle_direct_reclaim()
3162 pgdat = zone->zone_pgdat; in throttle_direct_reclaim()
3184 wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, in throttle_direct_reclaim()
3191 wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, in throttle_direct_reclaim()
3259 .reclaim_idx = MAX_NR_ZONES - 1, in mem_cgroup_shrink_node()
3266 WARN_ON_ONCE(!current->reclaim_state); in mem_cgroup_shrink_node()
3311 .reclaim_idx = MAX_NR_ZONES - 1, in try_to_free_mem_cgroup_pages()
3367 * Check for watermark boosts top-down as the higher zones in pgdat_watermark_boosted()
3373 for (i = highest_zoneidx; i >= 0; i--) { in pgdat_watermark_boosted()
3374 zone = pgdat->node_zones + i; in pgdat_watermark_boosted()
3378 if (zone->watermark_boost) in pgdat_watermark_boosted()
3392 unsigned long mark = -1; in pgdat_balanced()
3396 * Check watermarks bottom-up as lower zones are more likely to in pgdat_balanced()
3400 zone = pgdat->node_zones + i; in pgdat_balanced()
3412 * need balancing by definition. This can happen if a zone-restricted in pgdat_balanced()
3415 if (mark == -1) in pgdat_balanced()
3426 clear_bit(LRUVEC_CONGESTED, &lruvec->flags); in clear_pgdat_congested()
3427 clear_bit(PGDAT_DIRTY, &pgdat->flags); in clear_pgdat_congested()
3428 clear_bit(PGDAT_WRITEBACK, &pgdat->flags); in clear_pgdat_congested()
3453 if (waitqueue_active(&pgdat->pfmemalloc_wait)) in prepare_kswapd_sleep()
3454 wake_up_all(&pgdat->pfmemalloc_wait); in prepare_kswapd_sleep()
3457 if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) in prepare_kswapd_sleep()
3483 sc->nr_to_reclaim = 0; in kswapd_shrink_node()
3484 for (z = 0; z <= sc->reclaim_idx; z++) { in kswapd_shrink_node()
3485 zone = pgdat->node_zones + z; in kswapd_shrink_node()
3489 sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX); in kswapd_shrink_node()
3504 * high-order allocations. If twice the allocation size has been in kswapd_shrink_node()
3505 * reclaimed then recheck watermarks only at order-0 to prevent in kswapd_shrink_node()
3506 * excessive reclaim. Assume that a process requested a high-order in kswapd_shrink_node()
3509 if (sc->order && sc->nr_reclaimed >= compact_gap(sc->order)) in kswapd_shrink_node()
3510 sc->order = 0; in kswapd_shrink_node()
3512 return sc->nr_scanned >= sc->nr_to_reclaim; in kswapd_shrink_node()
3522 * kswapd scans the zones in the highmem->normal->dma direction. It skips
3557 zone = pgdat->node_zones + i; in balance_pgdat()
3561 nr_boost_reclaim += zone->watermark_boost; in balance_pgdat()
3562 zone_boosts[i] = zone->watermark_boost; in balance_pgdat()
3579 * purpose -- on 64-bit systems it is expected that in balance_pgdat()
3580 * buffer_heads are stripped during active rotation. On 32-bit in balance_pgdat()
3587 for (i = MAX_NR_ZONES - 1; i >= 0; i--) { in balance_pgdat()
3588 zone = pgdat->node_zones + i; in balance_pgdat()
3602 * re-evaluate if boosting is required when kswapd next wakes. in balance_pgdat()
3619 if (nr_boost_reclaim && sc.priority == DEF_PRIORITY - 2) in balance_pgdat()
3624 * intent is to relieve pressure not issue sub-optimal IO in balance_pgdat()
3643 if (sc.priority < DEF_PRIORITY - 2) in balance_pgdat()
3666 if (waitqueue_active(&pgdat->pfmemalloc_wait) && in balance_pgdat()
3668 wake_up_all(&pgdat->pfmemalloc_wait); in balance_pgdat()
3681 nr_reclaimed = sc.nr_reclaimed - nr_reclaimed; in balance_pgdat()
3682 nr_boost_reclaim -= min(nr_boost_reclaim, nr_reclaimed); in balance_pgdat()
3693 sc.priority--; in balance_pgdat()
3697 pgdat->kswapd_failures++; in balance_pgdat()
3709 zone = pgdat->node_zones + i; in balance_pgdat()
3710 spin_lock_irqsave(&zone->lock, flags); in balance_pgdat()
3711 zone->watermark_boost -= min(zone->watermark_boost, zone_boosts[i]); in balance_pgdat()
3712 spin_unlock_irqrestore(&zone->lock, flags); in balance_pgdat()
3737 * The pgdat->kswapd_highest_zoneidx is used to pass the highest zone index to
3746 enum zone_type curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx); in kswapd_highest_zoneidx()
3760 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); in kswapd_try_to_sleep()
3792 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, in kswapd_try_to_sleep()
3796 if (READ_ONCE(pgdat->kswapd_order) < reclaim_order) in kswapd_try_to_sleep()
3797 WRITE_ONCE(pgdat->kswapd_order, reclaim_order); in kswapd_try_to_sleep()
3800 finish_wait(&pgdat->kswapd_wait, &wait); in kswapd_try_to_sleep()
3801 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); in kswapd_try_to_sleep()
3810 trace_mm_vmscan_kswapd_sleep(pgdat->node_id); in kswapd_try_to_sleep()
3817 * per-cpu vmstat threshold while kswapd is awake and restore in kswapd_try_to_sleep()
3832 finish_wait(&pgdat->kswapd_wait, &wait); in kswapd_try_to_sleep()
3845 * If there are applications that are active memory-allocators
3851 unsigned int highest_zoneidx = MAX_NR_ZONES - 1; in kswapd()
3854 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); in kswapd()
3871 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; in kswapd()
3874 WRITE_ONCE(pgdat->kswapd_order, 0); in kswapd()
3875 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES); in kswapd()
3879 alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order); in kswapd()
3888 alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order); in kswapd()
3891 WRITE_ONCE(pgdat->kswapd_order, 0); in kswapd()
3892 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES); in kswapd()
3906 * Reclaim begins at the requested order but if a high-order in kswapd()
3908 * order-0. If that happens, kswapd will consider sleeping in kswapd()
3913 trace_mm_vmscan_kswapd_wake(pgdat->node_id, highest_zoneidx, in kswapd()
3921 tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); in kswapd()
3927 * A zone is low on free memory or too fragmented for high-order memory. If
3945 pgdat = zone->zone_pgdat; in wakeup_kswapd()
3946 curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx); in wakeup_kswapd()
3949 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, highest_zoneidx); in wakeup_kswapd()
3951 if (READ_ONCE(pgdat->kswapd_order) < order) in wakeup_kswapd()
3952 WRITE_ONCE(pgdat->kswapd_order, order); in wakeup_kswapd()
3954 if (!waitqueue_active(&pgdat->kswapd_wait)) in wakeup_kswapd()
3958 if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES || in wakeup_kswapd()
3963 * fragmented for high-order allocations. Wake up kcompactd in wakeup_kswapd()
3973 trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, highest_zoneidx, order, in wakeup_kswapd()
3975 wake_up_interruptible(&pgdat->kswapd_wait); in wakeup_kswapd()
3980 * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
3992 .reclaim_idx = MAX_NR_ZONES - 1, in shrink_all_memory()
4018 * This kswapd start function will be called by init and node-hot-add.
4019 * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
4026 if (pgdat->kswapd) in kswapd_run()
4029 pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid); in kswapd_run()
4030 if (IS_ERR(pgdat->kswapd)) { in kswapd_run()
4034 ret = PTR_ERR(pgdat->kswapd); in kswapd_run()
4035 pgdat->kswapd = NULL; in kswapd_run()
4046 struct task_struct *kswapd = NODE_DATA(nid)->kswapd; in kswapd_stop()
4050 NODE_DATA(nid)->kswapd = NULL; in kswapd_stop()
4070 * If non-zero call node_reclaim when the number of free pages falls below
4113 return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0; in node_unmapped_file_pages()
4141 return nr_pagecache_reclaimable - delta; in node_pagecache_reclaimable()
4165 trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, in __node_reclaim()
4177 p->flags |= PF_SWAPWRITE; in __node_reclaim()
4180 if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) { in __node_reclaim()
4191 } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); in __node_reclaim()
4195 current->flags &= ~PF_SWAPWRITE; in __node_reclaim()
4219 if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages && in node_reclaim()
4221 pgdat->min_slab_pages) in node_reclaim()
4227 if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC)) in node_reclaim()
4232 * have associated processors. This will favor the local processor in node_reclaim()
4236 if (node_state(pgdat->node_id, N_CPU) && pgdat->node_id != numa_node_id()) in node_reclaim()
4239 if (test_and_set_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags)) in node_reclaim()
4243 clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags); in node_reclaim()
4253 * check_move_unevictable_pages - check pages for evictability and move to
4269 for (i = 0; i < pvec->nr; i++) { in check_move_unevictable_pages()
4270 struct page *page = pvec->pages[i]; in check_move_unevictable_pages()
4282 spin_unlock_irq(&pgdat->lru_lock); in check_move_unevictable_pages()
4284 spin_lock_irq(&pgdat->lru_lock); in check_move_unevictable_pages()
4305 spin_unlock_irq(&pgdat->lru_lock); in check_move_unevictable_pages()