1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _LINUX_MMZONE_H
3 #define _LINUX_MMZONE_H
4
5 #ifndef __ASSEMBLY__
6 #ifndef __GENERATING_BOUNDS_H
7
8 #include <linux/spinlock.h>
9 #include <linux/list.h>
10 #include <linux/wait.h>
11 #include <linux/bitops.h>
12 #include <linux/cache.h>
13 #include <linux/threads.h>
14 #include <linux/numa.h>
15 #include <linux/init.h>
16 #include <linux/seqlock.h>
17 #include <linux/nodemask.h>
18 #include <linux/pageblock-flags.h>
19 #include <linux/page-flags-layout.h>
20 #include <linux/atomic.h>
21 #include <linux/mm_types.h>
22 #include <linux/page-flags.h>
23 #include <linux/local_lock.h>
24 #include <linux/android_kabi.h>
25 #include <asm/page.h>
26
27 /* Free memory management - zoned buddy allocator. */
28 #ifndef CONFIG_FORCE_MAX_ZONEORDER
29 #define MAX_ORDER 11
30 #else
31 #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
32 #endif
33 #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
34
35 /*
36 * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
37 * costly to service. That is between allocation orders which should
38 * coalesce naturally under reasonable reclaim pressure and those which
39 * will not.
40 */
41 #define PAGE_ALLOC_COSTLY_ORDER 3
42
43 #define MAX_KSWAPD_THREADS 16
44
45 enum migratetype {
46 MIGRATE_UNMOVABLE,
47 MIGRATE_MOVABLE,
48 MIGRATE_RECLAIMABLE,
49 #ifdef CONFIG_CMA
50 /*
51 * MIGRATE_CMA migration type is designed to mimic the way
52 * ZONE_MOVABLE works. Only movable pages can be allocated
53 * from MIGRATE_CMA pageblocks and page allocator never
54 * implicitly change migration type of MIGRATE_CMA pageblock.
55 *
56 * The way to use it is to change migratetype of a range of
57 * pageblocks to MIGRATE_CMA which can be done by
58 * __free_pageblock_cma() function. What is important though
59 * is that a range of pageblocks must be aligned to
60 * MAX_ORDER_NR_PAGES should biggest page be bigger than
61 * a single pageblock.
62 */
63 MIGRATE_CMA,
64 #endif
65 MIGRATE_PCPTYPES, /* the number of types on the pcp lists */
66 MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
67 #ifdef CONFIG_MEMORY_ISOLATION
68 MIGRATE_ISOLATE, /* can't allocate from here */
69 #endif
70 MIGRATE_TYPES
71 };
72
73 /* In mm/page_alloc.c; keep in sync also with show_migration_types() there */
74 extern const char * const migratetype_names[MIGRATE_TYPES];
75
76 #ifdef CONFIG_CMA
77 # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
78 # define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA)
79 # define get_cma_migrate_type() MIGRATE_CMA
80 #else
81 # define is_migrate_cma(migratetype) false
82 # define is_migrate_cma_page(_page) false
83 # define get_cma_migrate_type() MIGRATE_MOVABLE
84 #endif
85
is_migrate_movable(int mt)86 static inline bool is_migrate_movable(int mt)
87 {
88 return is_migrate_cma(mt) || mt == MIGRATE_MOVABLE;
89 }
90
91 #define for_each_migratetype_order(order, type) \
92 for (order = 0; order < MAX_ORDER; order++) \
93 for (type = 0; type < MIGRATE_TYPES; type++)
94
95 extern int page_group_by_mobility_disabled;
96
97 #define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1)
98
99 #define get_pageblock_migratetype(page) \
100 get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK)
101
102 struct free_area {
103 struct list_head free_list[MIGRATE_TYPES];
104 unsigned long nr_free;
105 };
106
get_page_from_free_area(struct free_area * area,int migratetype)107 static inline struct page *get_page_from_free_area(struct free_area *area,
108 int migratetype)
109 {
110 return list_first_entry_or_null(&area->free_list[migratetype],
111 struct page, lru);
112 }
113
free_area_empty(struct free_area * area,int migratetype)114 static inline bool free_area_empty(struct free_area *area, int migratetype)
115 {
116 return list_empty(&area->free_list[migratetype]);
117 }
118
119 struct pglist_data;
120
121 /*
122 * Add a wild amount of padding here to ensure data fall into separate
123 * cachelines. There are very few zone structures in the machine, so space
124 * consumption is not a concern here.
125 */
126 #if defined(CONFIG_SMP)
127 struct zone_padding {
128 char x[0];
129 } ____cacheline_internodealigned_in_smp;
130 #define ZONE_PADDING(name) struct zone_padding name;
131 #else
132 #define ZONE_PADDING(name)
133 #endif
134
135 #ifdef CONFIG_NUMA
136 enum numa_stat_item {
137 NUMA_HIT, /* allocated in intended node */
138 NUMA_MISS, /* allocated in non intended node */
139 NUMA_FOREIGN, /* was intended here, hit elsewhere */
140 NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */
141 NUMA_LOCAL, /* allocation from local node */
142 NUMA_OTHER, /* allocation from other node */
143 NR_VM_NUMA_EVENT_ITEMS
144 };
145 #else
146 #define NR_VM_NUMA_EVENT_ITEMS 0
147 #endif
148
149 enum zone_stat_item {
150 /* First 128 byte cacheline (assuming 64 bit words) */
151 NR_FREE_PAGES,
152 NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
153 NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE,
154 NR_ZONE_ACTIVE_ANON,
155 NR_ZONE_INACTIVE_FILE,
156 NR_ZONE_ACTIVE_FILE,
157 NR_ZONE_UNEVICTABLE,
158 NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
159 NR_MLOCK, /* mlock()ed pages found and moved off LRU */
160 /* Second 128 byte cacheline */
161 NR_BOUNCE,
162 NR_ZSPAGES, /* allocated in zsmalloc */
163 NR_FREE_CMA_PAGES,
164 NR_VM_ZONE_STAT_ITEMS };
165
166 enum node_stat_item {
167 NR_LRU_BASE,
168 NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
169 NR_ACTIVE_ANON, /* " " " " " */
170 NR_INACTIVE_FILE, /* " " " " " */
171 NR_ACTIVE_FILE, /* " " " " " */
172 NR_UNEVICTABLE, /* " " " " " */
173 NR_SLAB_RECLAIMABLE_B,
174 NR_SLAB_UNRECLAIMABLE_B,
175 NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
176 NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
177 WORKINGSET_NODES,
178 WORKINGSET_REFAULT_BASE,
179 WORKINGSET_REFAULT_ANON = WORKINGSET_REFAULT_BASE,
180 WORKINGSET_REFAULT_FILE,
181 WORKINGSET_ACTIVATE_BASE,
182 WORKINGSET_ACTIVATE_ANON = WORKINGSET_ACTIVATE_BASE,
183 WORKINGSET_ACTIVATE_FILE,
184 WORKINGSET_RESTORE_BASE,
185 WORKINGSET_RESTORE_ANON = WORKINGSET_RESTORE_BASE,
186 WORKINGSET_RESTORE_FILE,
187 WORKINGSET_NODERECLAIM,
188 NR_ANON_MAPPED, /* Mapped anonymous pages */
189 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
190 only modified from process context */
191 NR_FILE_PAGES,
192 NR_FILE_DIRTY,
193 NR_WRITEBACK,
194 NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
195 NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
196 NR_SHMEM_THPS,
197 NR_SHMEM_PMDMAPPED,
198 NR_FILE_THPS,
199 NR_FILE_PMDMAPPED,
200 NR_ANON_THPS,
201 NR_VMSCAN_WRITE,
202 NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
203 NR_DIRTIED, /* page dirtyings since bootup */
204 NR_WRITTEN, /* page writings since bootup */
205 NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */
206 NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */
207 NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */
208 NR_KERNEL_STACK_KB, /* measured in KiB */
209 #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
210 NR_KERNEL_SCS_KB, /* measured in KiB */
211 #endif
212 NR_PAGETABLE, /* used for pagetables */
213 #ifdef CONFIG_SWAP
214 NR_SWAPCACHE,
215 #endif
216 NR_VM_NODE_STAT_ITEMS
217 };
218
219 /*
220 * Returns true if the item should be printed in THPs (/proc/vmstat
221 * currently prints number of anon, file and shmem THPs. But the item
222 * is charged in pages).
223 */
vmstat_item_print_in_thp(enum node_stat_item item)224 static __always_inline bool vmstat_item_print_in_thp(enum node_stat_item item)
225 {
226 if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
227 return false;
228
229 return item == NR_ANON_THPS ||
230 item == NR_FILE_THPS ||
231 item == NR_SHMEM_THPS ||
232 item == NR_SHMEM_PMDMAPPED ||
233 item == NR_FILE_PMDMAPPED;
234 }
235
236 /*
237 * Returns true if the value is measured in bytes (most vmstat values are
238 * measured in pages). This defines the API part, the internal representation
239 * might be different.
240 */
vmstat_item_in_bytes(int idx)241 static __always_inline bool vmstat_item_in_bytes(int idx)
242 {
243 /*
244 * Global and per-node slab counters track slab pages.
245 * It's expected that changes are multiples of PAGE_SIZE.
246 * Internally values are stored in pages.
247 *
248 * Per-memcg and per-lruvec counters track memory, consumed
249 * by individual slab objects. These counters are actually
250 * byte-precise.
251 */
252 return (idx == NR_SLAB_RECLAIMABLE_B ||
253 idx == NR_SLAB_UNRECLAIMABLE_B);
254 }
255
256 /*
257 * We do arithmetic on the LRU lists in various places in the code,
258 * so it is important to keep the active lists LRU_ACTIVE higher in
259 * the array than the corresponding inactive lists, and to keep
260 * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists.
261 *
262 * This has to be kept in sync with the statistics in zone_stat_item
263 * above and the descriptions in vmstat_text in mm/vmstat.c
264 */
265 #define LRU_BASE 0
266 #define LRU_ACTIVE 1
267 #define LRU_FILE 2
268
269 enum lru_list {
270 LRU_INACTIVE_ANON = LRU_BASE,
271 LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
272 LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
273 LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
274 LRU_UNEVICTABLE,
275 NR_LRU_LISTS
276 };
277
278 #define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++)
279
280 #define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++)
281
is_file_lru(enum lru_list lru)282 static inline bool is_file_lru(enum lru_list lru)
283 {
284 return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE);
285 }
286
is_active_lru(enum lru_list lru)287 static inline bool is_active_lru(enum lru_list lru)
288 {
289 return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE);
290 }
291
292 #define ANON_AND_FILE 2
293
294 enum lruvec_flags {
295 LRUVEC_CONGESTED, /* lruvec has many dirty pages
296 * backed by a congested BDI
297 */
298 };
299
300 #endif /* !__GENERATING_BOUNDS_H */
301
302 /*
303 * Evictable pages are divided into multiple generations. The youngest and the
304 * oldest generation numbers, max_seq and min_seq, are monotonically increasing.
305 * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An
306 * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the
307 * corresponding generation. The gen counter in page->flags stores gen+1 while
308 * a page is on one of lrugen->lists[]. Otherwise it stores 0.
309 *
310 * A page is added to the youngest generation on faulting. The aging needs to
311 * check the accessed bit at least twice before handing this page over to the
312 * eviction. The first check takes care of the accessed bit set on the initial
313 * fault; the second check makes sure this page hasn't been used since then.
314 * This process, AKA second chance, requires a minimum of two generations,
315 * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive
316 * LRU, e.g., /proc/vmstat, these two generations are considered active; the
317 * rest of generations, if they exist, are considered inactive. See
318 * lru_gen_is_active().
319 *
320 * PG_active is always cleared while a page is on one of lrugen->lists[] so that
321 * the aging needs not to worry about it. And it's set again when a page
322 * considered active is isolated for non-reclaiming purposes, e.g., migration.
323 * See lru_gen_add_page() and lru_gen_del_page().
324 *
325 * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the
326 * number of categories of the active/inactive LRU when keeping track of
327 * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits
328 * in page->flags.
329 */
330 #define MIN_NR_GENS 2U
331 #define MAX_NR_GENS 4U
332
333 /*
334 * Each generation is divided into multiple tiers. A page accessed N times
335 * through file descriptors is in tier order_base_2(N). A page in the first tier
336 * (N=0,1) is marked by PG_referenced unless it was faulted in through page
337 * tables or read ahead. A page in any other tier (N>1) is marked by
338 * PG_referenced and PG_workingset. This implies a minimum of two tiers is
339 * supported without using additional bits in page->flags.
340 *
341 * In contrast to moving across generations which requires the LRU lock, moving
342 * across tiers only involves atomic operations on page->flags and therefore
343 * has a negligible cost in the buffered access path. In the eviction path,
344 * comparisons of refaulted/(evicted+protected) from the first tier and the
345 * rest infer whether pages accessed multiple times through file descriptors
346 * are statistically hot and thus worth protecting.
347 *
348 * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice the
349 * number of categories of the active/inactive LRU when keeping track of
350 * accesses through file descriptors. This uses MAX_NR_TIERS-2 spare bits in
351 * page->flags.
352 */
353 #define MAX_NR_TIERS 4U
354
355 #ifndef __GENERATING_BOUNDS_H
356
357 struct lruvec;
358 struct page_vma_mapped_walk;
359
360 #define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
361 #define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
362
363 #ifdef CONFIG_LRU_GEN
364
365 enum {
366 LRU_GEN_ANON,
367 LRU_GEN_FILE,
368 };
369
370 enum {
371 LRU_GEN_CORE,
372 LRU_GEN_MM_WALK,
373 LRU_GEN_NONLEAF_YOUNG,
374 NR_LRU_GEN_CAPS
375 };
376
377 #define MIN_LRU_BATCH BITS_PER_LONG
378 #define MAX_LRU_BATCH (MIN_LRU_BATCH * 64)
379
380 /* whether to keep historical stats from evicted generations */
381 #ifdef CONFIG_LRU_GEN_STATS
382 #define NR_HIST_GENS MAX_NR_GENS
383 #else
384 #define NR_HIST_GENS 1U
385 #endif
386
387 /*
388 * The youngest generation number is stored in max_seq for both anon and file
389 * types as they are aged on an equal footing. The oldest generation numbers are
390 * stored in min_seq[] separately for anon and file types as clean file pages
391 * can be evicted regardless of swap constraints.
392 *
393 * Normally anon and file min_seq are in sync. But if swapping is constrained,
394 * e.g., out of swap space, file min_seq is allowed to advance and leave anon
395 * min_seq behind.
396 *
397 * The number of pages in each generation is eventually consistent and therefore
398 * can be transiently negative when reset_batch_size() is pending.
399 */
400 struct lru_gen_struct {
401 /* the aging increments the youngest generation number */
402 unsigned long max_seq;
403 /* the eviction increments the oldest generation numbers */
404 unsigned long min_seq[ANON_AND_FILE];
405 /* the birth time of each generation in jiffies */
406 unsigned long timestamps[MAX_NR_GENS];
407 /* the multi-gen LRU lists, lazily sorted on eviction */
408 struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
409 /* the multi-gen LRU sizes, eventually consistent */
410 unsigned long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
411 /* the exponential moving average of refaulted */
412 unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS];
413 /* the exponential moving average of evicted+protected */
414 unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS];
415 /* the first tier doesn't need protection, hence the minus one */
416 unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1];
417 /* can be modified without holding the LRU lock */
418 atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
419 atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
420 /* whether the multi-gen LRU is enabled */
421 bool enabled;
422 };
423
424 enum {
425 MM_LEAF_TOTAL, /* total leaf entries */
426 MM_LEAF_OLD, /* old leaf entries */
427 MM_LEAF_YOUNG, /* young leaf entries */
428 MM_NONLEAF_TOTAL, /* total non-leaf entries */
429 MM_NONLEAF_FOUND, /* non-leaf entries found in Bloom filters */
430 MM_NONLEAF_ADDED, /* non-leaf entries added to Bloom filters */
431 NR_MM_STATS
432 };
433
434 /* double-buffering Bloom filters */
435 #define NR_BLOOM_FILTERS 2
436
437 struct lru_gen_mm_state {
438 /* set to max_seq after each iteration */
439 unsigned long seq;
440 /* where the current iteration continues after */
441 struct list_head *head;
442 /* where the last iteration ended before */
443 struct list_head *tail;
444 /* Unused - keep for ABI compatiiblity */
445 struct wait_queue_head wait;
446 /* Bloom filters flip after each iteration */
447 unsigned long *filters[NR_BLOOM_FILTERS];
448 /* the mm stats for debugging */
449 unsigned long stats[NR_HIST_GENS][NR_MM_STATS];
450 /* Unused - keep for ABI compatiiblity */
451 int nr_walkers;
452 };
453
454 struct lru_gen_mm_walk {
455 /* the lruvec under reclaim */
456 struct lruvec *lruvec;
457 /* unstable max_seq from lru_gen_struct */
458 unsigned long max_seq;
459 /* the next address within an mm to scan */
460 unsigned long next_addr;
461 /* Unused -- for ABI compatibility */
462 unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)];
463 /* to batch promoted pages */
464 int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
465 /* to batch the mm stats */
466 int mm_stats[NR_MM_STATS];
467 /* total batched items */
468 int batched;
469 bool can_swap;
470 bool full_scan;
471 };
472
473 void lru_gen_init_lruvec(struct lruvec *lruvec);
474 void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
475
476 #ifdef CONFIG_MEMCG
477 void lru_gen_init_memcg(struct mem_cgroup *memcg);
478 void lru_gen_exit_memcg(struct mem_cgroup *memcg);
479 #endif
480
481 #else /* !CONFIG_LRU_GEN */
482
lru_gen_init_lruvec(struct lruvec * lruvec)483 static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
484 {
485 }
486
lru_gen_look_around(struct page_vma_mapped_walk * pvmw)487 static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
488 {
489 }
490
491 #ifdef CONFIG_MEMCG
lru_gen_init_memcg(struct mem_cgroup * memcg)492 static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
493 {
494 }
495
lru_gen_exit_memcg(struct mem_cgroup * memcg)496 static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg)
497 {
498 }
499 #endif
500
501 #endif /* CONFIG_LRU_GEN */
502
503 struct lruvec {
504 struct list_head lists[NR_LRU_LISTS];
505 /* per lruvec lru_lock for memcg */
506 spinlock_t lru_lock;
507 /*
508 * These track the cost of reclaiming one LRU - file or anon -
509 * over the other. As the observed cost of reclaiming one LRU
510 * increases, the reclaim scan balance tips toward the other.
511 */
512 unsigned long anon_cost;
513 unsigned long file_cost;
514 /* Non-resident age, driven by LRU movement */
515 atomic_long_t nonresident_age;
516 /* Refaults at the time of last reclaim cycle */
517 unsigned long refaults[ANON_AND_FILE];
518 /* Various lruvec state flags (enum lruvec_flags) */
519 unsigned long flags;
520 #ifdef CONFIG_LRU_GEN
521 /* evictable pages divided into generations */
522 struct lru_gen_struct lrugen;
523 /* to concurrently iterate lru_gen_mm_list */
524 struct lru_gen_mm_state mm_state;
525 #endif
526 #ifdef CONFIG_MEMCG
527 struct pglist_data *pgdat;
528 #endif
529 ANDROID_VENDOR_DATA(1);
530 };
531
532 /* Isolate unmapped pages */
533 #define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2)
534 /* Isolate for asynchronous migration */
535 #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4)
536 /* Isolate unevictable pages */
537 #define ISOLATE_UNEVICTABLE ((__force isolate_mode_t)0x8)
538
539 /* LRU Isolation modes. */
540 typedef unsigned __bitwise isolate_mode_t;
541
542 enum zone_watermarks {
543 WMARK_MIN,
544 WMARK_LOW,
545 WMARK_HIGH,
546 NR_WMARK
547 };
548
549 /*
550 * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER plus one additional
551 * for pageblock size for THP if configured.
552 */
553 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
554 #define NR_PCP_THP 1
555 #else
556 #define NR_PCP_THP 0
557 #endif
558 #define NR_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1 + NR_PCP_THP))
559
560 /*
561 * Shift to encode migratetype and order in the same integer, with order
562 * in the least significant bits.
563 */
564 #define NR_PCP_ORDER_WIDTH 8
565 #define NR_PCP_ORDER_MASK ((1<<NR_PCP_ORDER_WIDTH) - 1)
566
567 #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
568 #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
569 #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
570 #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)
571
572 /* Fields and list protected by pagesets local_lock in page_alloc.c */
573 struct per_cpu_pages {
574 int count; /* number of pages in the list */
575 int high; /* high watermark, emptying needed */
576 int batch; /* chunk size for buddy add/remove */
577 short free_factor; /* batch scaling factor during free */
578 #ifdef CONFIG_NUMA
579 short expire; /* When 0, remote pagesets are drained */
580 #endif
581
582 /* Lists of pages, one per migrate type stored on the pcp-lists */
583 struct list_head lists[NR_PCP_LISTS];
584 };
585
586 struct per_cpu_pages_ext {
587 spinlock_t lock; /* Protects pcp.lists field */
588 struct per_cpu_pages pcp;
589 };
590
591 struct per_cpu_zonestat {
592 #ifdef CONFIG_SMP
593 s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
594 s8 stat_threshold;
595 #endif
596 #ifdef CONFIG_NUMA
597 /*
598 * Low priority inaccurate counters that are only folded
599 * on demand. Use a large type to avoid the overhead of
600 * folding during refresh_cpu_vm_stats.
601 */
602 unsigned long vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
603 #endif
604 };
605
606 struct per_cpu_nodestat {
607 s8 stat_threshold;
608 s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS];
609 };
610
611 #endif /* !__GENERATING_BOUNDS.H */
612
613 enum zone_type {
614 /*
615 * ZONE_DMA and ZONE_DMA32 are used when there are peripherals not able
616 * to DMA to all of the addressable memory (ZONE_NORMAL).
617 * On architectures where this area covers the whole 32 bit address
618 * space ZONE_DMA32 is used. ZONE_DMA is left for the ones with smaller
619 * DMA addressing constraints. This distinction is important as a 32bit
620 * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit
621 * platforms may need both zones as they support peripherals with
622 * different DMA addressing limitations.
623 */
624 #ifdef CONFIG_ZONE_DMA
625 ZONE_DMA,
626 #endif
627 #ifdef CONFIG_ZONE_DMA32
628 ZONE_DMA32,
629 #endif
630 /*
631 * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
632 * performed on pages in ZONE_NORMAL if the DMA devices support
633 * transfers to all addressable memory.
634 */
635 ZONE_NORMAL,
636 #ifdef CONFIG_HIGHMEM
637 /*
638 * A memory area that is only addressable by the kernel through
639 * mapping portions into its own address space. This is for example
640 * used by i386 to allow the kernel to address the memory beyond
641 * 900MB. The kernel will set up special mappings (page
642 * table entries on i386) for each page that the kernel needs to
643 * access.
644 */
645 ZONE_HIGHMEM,
646 #endif
647 /*
648 * ZONE_MOVABLE is similar to ZONE_NORMAL, except that it contains
649 * movable pages with few exceptional cases described below. Main use
650 * cases for ZONE_MOVABLE are to make memory offlining/unplug more
651 * likely to succeed, and to locally limit unmovable allocations - e.g.,
652 * to increase the number of THP/huge pages. Notable special cases are:
653 *
654 * 1. Pinned pages: (long-term) pinning of movable pages might
655 * essentially turn such pages unmovable. Therefore, we do not allow
656 * pinning long-term pages in ZONE_MOVABLE. When pages are pinned and
657 * faulted, they come from the right zone right away. However, it is
658 * still possible that address space already has pages in
659 * ZONE_MOVABLE at the time when pages are pinned (i.e. user has
660 * touches that memory before pinning). In such case we migrate them
661 * to a different zone. When migration fails - pinning fails.
662 * 2. memblock allocations: kernelcore/movablecore setups might create
663 * situations where ZONE_MOVABLE contains unmovable allocations
664 * after boot. Memory offlining and allocations fail early.
665 * 3. Memory holes: kernelcore/movablecore setups might create very rare
666 * situations where ZONE_MOVABLE contains memory holes after boot,
667 * for example, if we have sections that are only partially
668 * populated. Memory offlining and allocations fail early.
669 * 4. PG_hwpoison pages: while poisoned pages can be skipped during
670 * memory offlining, such pages cannot be allocated.
671 * 5. Unmovable PG_offline pages: in paravirtualized environments,
672 * hotplugged memory blocks might only partially be managed by the
673 * buddy (e.g., via XEN-balloon, Hyper-V balloon, virtio-mem). The
674 * parts not manged by the buddy are unmovable PG_offline pages. In
675 * some cases (virtio-mem), such pages can be skipped during
676 * memory offlining, however, cannot be moved/allocated. These
677 * techniques might use alloc_contig_range() to hide previously
678 * exposed pages from the buddy again (e.g., to implement some sort
679 * of memory unplug in virtio-mem).
680 * 6. ZERO_PAGE(0), kernelcore/movablecore setups might create
681 * situations where ZERO_PAGE(0) which is allocated differently
682 * on different platforms may end up in a movable zone. ZERO_PAGE(0)
683 * cannot be migrated.
684 * 7. Memory-hotplug: when using memmap_on_memory and onlining the
685 * memory to the MOVABLE zone, the vmemmap pages are also placed in
686 * such zone. Such pages cannot be really moved around as they are
687 * self-stored in the range, but they are treated as movable when
688 * the range they describe is about to be offlined.
689 *
690 * In general, no unmovable allocations that degrade memory offlining
691 * should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range())
692 * have to expect that migrating pages in ZONE_MOVABLE can fail (even
693 * if has_unmovable_pages() states that there are no unmovable pages,
694 * there can be false negatives).
695 */
696 ZONE_MOVABLE,
697 #ifdef CONFIG_ZONE_DEVICE
698 ZONE_DEVICE,
699 #endif
700 __MAX_NR_ZONES
701
702 };
703
704 #ifndef __GENERATING_BOUNDS_H
705
706 #define ASYNC_AND_SYNC 2
707
708 struct zone {
709 /* Read-mostly fields */
710
711 /* zone watermarks, access with *_wmark_pages(zone) macros */
712 unsigned long _watermark[NR_WMARK];
713 unsigned long watermark_boost;
714
715 unsigned long nr_reserved_highatomic;
716
717 /*
718 * We don't know if the memory that we're going to allocate will be
719 * freeable or/and it will be released eventually, so to avoid totally
720 * wasting several GB of ram we must reserve some of the lower zone
721 * memory (otherwise we risk to run OOM on the lower zones despite
722 * there being tons of freeable ram on the higher zones). This array is
723 * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl
724 * changes.
725 */
726 long lowmem_reserve[MAX_NR_ZONES];
727
728 #ifdef CONFIG_NUMA
729 int node;
730 #endif
731 struct pglist_data *zone_pgdat;
732 struct per_cpu_pages __percpu *per_cpu_pageset;
733 struct per_cpu_zonestat __percpu *per_cpu_zonestats;
734 /*
735 * the high and batch values are copied to individual pagesets for
736 * faster access
737 */
738 int pageset_high;
739 int pageset_batch;
740
741 #ifndef CONFIG_SPARSEMEM
742 /*
743 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
744 * In SPARSEMEM, this map is stored in struct mem_section
745 */
746 unsigned long *pageblock_flags;
747 #endif /* CONFIG_SPARSEMEM */
748
749 /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
750 unsigned long zone_start_pfn;
751
752 /*
753 * spanned_pages is the total pages spanned by the zone, including
754 * holes, which is calculated as:
755 * spanned_pages = zone_end_pfn - zone_start_pfn;
756 *
757 * present_pages is physical pages existing within the zone, which
758 * is calculated as:
759 * present_pages = spanned_pages - absent_pages(pages in holes);
760 *
761 * present_early_pages is present pages existing within the zone
762 * located on memory available since early boot, excluding hotplugged
763 * memory.
764 *
765 * managed_pages is present pages managed by the buddy system, which
766 * is calculated as (reserved_pages includes pages allocated by the
767 * bootmem allocator):
768 * managed_pages = present_pages - reserved_pages;
769 *
770 * cma pages is present pages that are assigned for CMA use
771 * (MIGRATE_CMA).
772 *
773 * So present_pages may be used by memory hotplug or memory power
774 * management logic to figure out unmanaged pages by checking
775 * (present_pages - managed_pages). And managed_pages should be used
776 * by page allocator and vm scanner to calculate all kinds of watermarks
777 * and thresholds.
778 *
779 * Locking rules:
780 *
781 * zone_start_pfn and spanned_pages are protected by span_seqlock.
782 * It is a seqlock because it has to be read outside of zone->lock,
783 * and it is done in the main allocator path. But, it is written
784 * quite infrequently.
785 *
786 * The span_seq lock is declared along with zone->lock because it is
787 * frequently read in proximity to zone->lock. It's good to
788 * give them a chance of being in the same cacheline.
789 *
790 * Write access to present_pages at runtime should be protected by
791 * mem_hotplug_begin/end(). Any reader who can't tolerant drift of
792 * present_pages should get_online_mems() to get a stable value.
793 */
794 atomic_long_t managed_pages;
795 unsigned long spanned_pages;
796 unsigned long present_pages;
797 #if defined(CONFIG_MEMORY_HOTPLUG)
798 unsigned long present_early_pages;
799 #endif
800 #ifdef CONFIG_CMA
801 unsigned long cma_pages;
802 #endif
803
804 const char *name;
805
806 #ifdef CONFIG_MEMORY_ISOLATION
807 /*
808 * Number of isolated pageblock. It is used to solve incorrect
809 * freepage counting problem due to racy retrieving migratetype
810 * of pageblock. Protected by zone->lock.
811 */
812 unsigned long nr_isolate_pageblock;
813 #endif
814
815 #ifdef CONFIG_MEMORY_HOTPLUG
816 /* see spanned/present_pages for more description */
817 seqlock_t span_seqlock;
818 #endif
819
820 int initialized;
821
822 /* Write-intensive fields used from the page allocator */
823 ZONE_PADDING(_pad1_)
824
825 /* free areas of different sizes */
826 struct free_area free_area[MAX_ORDER];
827
828 /* zone flags, see below */
829 unsigned long flags;
830
831 /* Primarily protects free_area */
832 spinlock_t lock;
833
834 /* Write-intensive fields used by compaction and vmstats. */
835 ZONE_PADDING(_pad2_)
836
837 /*
838 * When free pages are below this point, additional steps are taken
839 * when reading the number of free pages to avoid per-cpu counter
840 * drift allowing watermarks to be breached
841 */
842 unsigned long percpu_drift_mark;
843
844 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
845 /* pfn where compaction free scanner should start */
846 unsigned long compact_cached_free_pfn;
847 /* pfn where compaction migration scanner should start */
848 unsigned long compact_cached_migrate_pfn[ASYNC_AND_SYNC];
849 unsigned long compact_init_migrate_pfn;
850 unsigned long compact_init_free_pfn;
851 #endif
852
853 #ifdef CONFIG_COMPACTION
854 /*
855 * On compaction failure, 1<<compact_defer_shift compactions
856 * are skipped before trying again. The number attempted since
857 * last failure is tracked with compact_considered.
858 * compact_order_failed is the minimum compaction failed order.
859 */
860 unsigned int compact_considered;
861 unsigned int compact_defer_shift;
862 int compact_order_failed;
863 #endif
864
865 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
866 /* Set to true when the PG_migrate_skip bits should be cleared */
867 bool compact_blockskip_flush;
868 #endif
869
870 bool contiguous;
871
872 ZONE_PADDING(_pad3_)
873 /* Zone statistics */
874 atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
875 atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
876
877 ANDROID_KABI_RESERVE(1);
878 ANDROID_KABI_RESERVE(2);
879 ANDROID_KABI_RESERVE(3);
880 ANDROID_KABI_RESERVE(4);
881 } ____cacheline_internodealigned_in_smp;
882
883 enum pgdat_flags {
884 PGDAT_DIRTY, /* reclaim scanning has recently found
885 * many dirty file pages at the tail
886 * of the LRU.
887 */
888 PGDAT_WRITEBACK, /* reclaim scanning has recently found
889 * many pages under writeback
890 */
891 PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
892 };
893
894 enum zone_flags {
895 ZONE_BOOSTED_WATERMARK, /* zone recently boosted watermarks.
896 * Cleared when kswapd is woken.
897 */
898 ZONE_RECLAIM_ACTIVE, /* kswapd may be scanning the zone. */
899 };
900
zone_managed_pages(struct zone * zone)901 static inline unsigned long zone_managed_pages(struct zone *zone)
902 {
903 return (unsigned long)atomic_long_read(&zone->managed_pages);
904 }
905
zone_cma_pages(struct zone * zone)906 static inline unsigned long zone_cma_pages(struct zone *zone)
907 {
908 #ifdef CONFIG_CMA
909 return zone->cma_pages;
910 #else
911 return 0;
912 #endif
913 }
914
zone_end_pfn(const struct zone * zone)915 static inline unsigned long zone_end_pfn(const struct zone *zone)
916 {
917 return zone->zone_start_pfn + zone->spanned_pages;
918 }
919
zone_spans_pfn(const struct zone * zone,unsigned long pfn)920 static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn)
921 {
922 return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone);
923 }
924
zone_is_initialized(struct zone * zone)925 static inline bool zone_is_initialized(struct zone *zone)
926 {
927 return zone->initialized;
928 }
929
zone_is_empty(struct zone * zone)930 static inline bool zone_is_empty(struct zone *zone)
931 {
932 return zone->spanned_pages == 0;
933 }
934
935 /*
936 * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty
937 * intersection with the given zone
938 */
zone_intersects(struct zone * zone,unsigned long start_pfn,unsigned long nr_pages)939 static inline bool zone_intersects(struct zone *zone,
940 unsigned long start_pfn, unsigned long nr_pages)
941 {
942 if (zone_is_empty(zone))
943 return false;
944 if (start_pfn >= zone_end_pfn(zone) ||
945 start_pfn + nr_pages <= zone->zone_start_pfn)
946 return false;
947
948 return true;
949 }
950
951 /*
952 * The "priority" of VM scanning is how much of the queues we will scan in one
953 * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
954 * queues ("queue_length >> 12") during an aging round.
955 */
956 #define DEF_PRIORITY 12
957
958 /* Maximum number of zones on a zonelist */
959 #define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES)
960
961 enum {
962 ZONELIST_FALLBACK, /* zonelist with fallback */
963 #ifdef CONFIG_NUMA
964 /*
965 * The NUMA zonelists are doubled because we need zonelists that
966 * restrict the allocations to a single node for __GFP_THISNODE.
967 */
968 ZONELIST_NOFALLBACK, /* zonelist without fallback (__GFP_THISNODE) */
969 #endif
970 MAX_ZONELISTS
971 };
972
973 /*
974 * This struct contains information about a zone in a zonelist. It is stored
975 * here to avoid dereferences into large structures and lookups of tables
976 */
977 struct zoneref {
978 struct zone *zone; /* Pointer to actual zone */
979 int zone_idx; /* zone_idx(zoneref->zone) */
980 };
981
982 /*
983 * One allocation request operates on a zonelist. A zonelist
984 * is a list of zones, the first one is the 'goal' of the
985 * allocation, the other zones are fallback zones, in decreasing
986 * priority.
987 *
988 * To speed the reading of the zonelist, the zonerefs contain the zone index
989 * of the entry being read. Helper functions to access information given
990 * a struct zoneref are
991 *
992 * zonelist_zone() - Return the struct zone * for an entry in _zonerefs
993 * zonelist_zone_idx() - Return the index of the zone for an entry
994 * zonelist_node_idx() - Return the index of the node for an entry
995 */
996 struct zonelist {
997 struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
998 };
999
1000 /*
1001 * The array of struct pages for flatmem.
1002 * It must be declared for SPARSEMEM as well because there are configurations
1003 * that rely on that.
1004 */
1005 extern struct page *mem_map;
1006
1007 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1008 struct deferred_split {
1009 spinlock_t split_queue_lock;
1010 struct list_head split_queue;
1011 unsigned long split_queue_len;
1012 };
1013 #endif
1014
1015 /*
1016 * On NUMA machines, each NUMA node would have a pg_data_t to describe
1017 * it's memory layout. On UMA machines there is a single pglist_data which
1018 * describes the whole memory.
1019 *
1020 * Memory statistics and page replacement data structures are maintained on a
1021 * per-zone basis.
1022 */
1023 typedef struct pglist_data {
1024 /*
1025 * node_zones contains just the zones for THIS node. Not all of the
1026 * zones may be populated, but it is the full list. It is referenced by
1027 * this node's node_zonelists as well as other node's node_zonelists.
1028 */
1029 struct zone node_zones[MAX_NR_ZONES];
1030
1031 /*
1032 * node_zonelists contains references to all zones in all nodes.
1033 * Generally the first zones will be references to this node's
1034 * node_zones.
1035 */
1036 struct zonelist node_zonelists[MAX_ZONELISTS];
1037
1038 int nr_zones; /* number of populated zones in this node */
1039 #ifdef CONFIG_FLATMEM /* means !SPARSEMEM */
1040 struct page *node_mem_map;
1041 #ifdef CONFIG_PAGE_EXTENSION
1042 struct page_ext *node_page_ext;
1043 #endif
1044 #endif
1045 #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
1046 /*
1047 * Must be held any time you expect node_start_pfn,
1048 * node_present_pages, node_spanned_pages or nr_zones to stay constant.
1049 * Also synchronizes pgdat->first_deferred_pfn during deferred page
1050 * init.
1051 *
1052 * pgdat_resize_lock() and pgdat_resize_unlock() are provided to
1053 * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG
1054 * or CONFIG_DEFERRED_STRUCT_PAGE_INIT.
1055 *
1056 * Nests above zone->lock and zone->span_seqlock
1057 */
1058 spinlock_t node_size_lock;
1059 #endif
1060 unsigned long node_start_pfn;
1061 unsigned long node_present_pages; /* total number of physical pages */
1062 unsigned long node_spanned_pages; /* total size of physical page
1063 range, including holes */
1064 int node_id;
1065 wait_queue_head_t kswapd_wait;
1066 wait_queue_head_t pfmemalloc_wait;
1067 struct task_struct *kswapd; /* Protected by
1068 mem_hotplug_begin/end() */
1069 struct task_struct *mkswapd[MAX_KSWAPD_THREADS];
1070 int kswapd_order;
1071 enum zone_type kswapd_highest_zoneidx;
1072
1073 int kswapd_failures; /* Number of 'reclaimed == 0' runs */
1074
1075 ANDROID_OEM_DATA(1);
1076 #ifdef CONFIG_COMPACTION
1077 int kcompactd_max_order;
1078 enum zone_type kcompactd_highest_zoneidx;
1079 wait_queue_head_t kcompactd_wait;
1080 struct task_struct *kcompactd;
1081 bool proactive_compact_trigger;
1082 #endif
1083 /*
1084 * This is a per-node reserve of pages that are not available
1085 * to userspace allocations.
1086 */
1087 unsigned long totalreserve_pages;
1088
1089 #ifdef CONFIG_NUMA
1090 /*
1091 * node reclaim becomes active if more unmapped pages exist.
1092 */
1093 unsigned long min_unmapped_pages;
1094 unsigned long min_slab_pages;
1095 #endif /* CONFIG_NUMA */
1096
1097 /* Write-intensive fields used by page reclaim */
1098 ZONE_PADDING(_pad1_)
1099
1100 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1101 /*
1102 * If memory initialisation on large machines is deferred then this
1103 * is the first PFN that needs to be initialised.
1104 */
1105 unsigned long first_deferred_pfn;
1106 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1107
1108 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1109 struct deferred_split deferred_split_queue;
1110 #endif
1111
1112 /* Fields commonly accessed by the page reclaim scanner */
1113
1114 /*
1115 * NOTE: THIS IS UNUSED IF MEMCG IS ENABLED.
1116 *
1117 * Use mem_cgroup_lruvec() to look up lruvecs.
1118 */
1119 struct lruvec __lruvec;
1120
1121 unsigned long flags;
1122
1123 #ifdef CONFIG_LRU_GEN
1124 /* kswap mm walk data */
1125 struct lru_gen_mm_walk mm_walk;
1126 #endif
1127
1128 ZONE_PADDING(_pad2_)
1129
1130 /* Per-node vmstats */
1131 struct per_cpu_nodestat __percpu *per_cpu_nodestats;
1132 atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS];
1133 } pg_data_t;
1134
1135 #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
1136 #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages)
1137 #ifdef CONFIG_FLATMEM
1138 #define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr))
1139 #else
1140 #define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr))
1141 #endif
1142 #define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr))
1143
1144 #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
1145 #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
1146
pgdat_end_pfn(pg_data_t * pgdat)1147 static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
1148 {
1149 return pgdat->node_start_pfn + pgdat->node_spanned_pages;
1150 }
1151
pgdat_is_empty(pg_data_t * pgdat)1152 static inline bool pgdat_is_empty(pg_data_t *pgdat)
1153 {
1154 return !pgdat->node_start_pfn && !pgdat->node_spanned_pages;
1155 }
1156
1157 #include <linux/memory_hotplug.h>
1158
1159 void build_all_zonelists(pg_data_t *pgdat);
1160 void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order,
1161 enum zone_type highest_zoneidx);
1162 bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
1163 int highest_zoneidx, unsigned int alloc_flags,
1164 long free_pages);
1165 bool zone_watermark_ok(struct zone *z, unsigned int order,
1166 unsigned long mark, int highest_zoneidx,
1167 unsigned int alloc_flags);
1168 bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
1169 unsigned long mark, int highest_zoneidx);
1170 /*
1171 * Memory initialization context, use to differentiate memory added by
1172 * the platform statically or via memory hotplug interface.
1173 */
1174 enum meminit_context {
1175 MEMINIT_EARLY,
1176 MEMINIT_HOTPLUG,
1177 };
1178
1179 extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
1180 unsigned long size);
1181
1182 extern void lruvec_init(struct lruvec *lruvec);
1183
lruvec_pgdat(struct lruvec * lruvec)1184 static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
1185 {
1186 #ifdef CONFIG_MEMCG
1187 return lruvec->pgdat;
1188 #else
1189 return container_of(lruvec, struct pglist_data, __lruvec);
1190 #endif
1191 }
1192
1193 #ifdef CONFIG_HAVE_MEMORYLESS_NODES
1194 int local_memory_node(int node_id);
1195 #else
local_memory_node(int node_id)1196 static inline int local_memory_node(int node_id) { return node_id; };
1197 #endif
1198
1199 /*
1200 * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
1201 */
1202 #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones)
1203
1204 #ifdef CONFIG_ZONE_DEVICE
zone_is_zone_device(struct zone * zone)1205 static inline bool zone_is_zone_device(struct zone *zone)
1206 {
1207 return zone_idx(zone) == ZONE_DEVICE;
1208 }
1209 #else
zone_is_zone_device(struct zone * zone)1210 static inline bool zone_is_zone_device(struct zone *zone)
1211 {
1212 return false;
1213 }
1214 #endif
1215
1216 /*
1217 * Returns true if a zone has pages managed by the buddy allocator.
1218 * All the reclaim decisions have to use this function rather than
1219 * populated_zone(). If the whole zone is reserved then we can easily
1220 * end up with populated_zone() && !managed_zone().
1221 */
managed_zone(struct zone * zone)1222 static inline bool managed_zone(struct zone *zone)
1223 {
1224 return zone_managed_pages(zone);
1225 }
1226
1227 /* Returns true if a zone has memory */
populated_zone(struct zone * zone)1228 static inline bool populated_zone(struct zone *zone)
1229 {
1230 return zone->present_pages;
1231 }
1232
1233 #ifdef CONFIG_NUMA
zone_to_nid(struct zone * zone)1234 static inline int zone_to_nid(struct zone *zone)
1235 {
1236 return zone->node;
1237 }
1238
zone_set_nid(struct zone * zone,int nid)1239 static inline void zone_set_nid(struct zone *zone, int nid)
1240 {
1241 zone->node = nid;
1242 }
1243 #else
zone_to_nid(struct zone * zone)1244 static inline int zone_to_nid(struct zone *zone)
1245 {
1246 return 0;
1247 }
1248
zone_set_nid(struct zone * zone,int nid)1249 static inline void zone_set_nid(struct zone *zone, int nid) {}
1250 #endif
1251
1252 extern int movable_zone;
1253
is_highmem_idx(enum zone_type idx)1254 static inline int is_highmem_idx(enum zone_type idx)
1255 {
1256 #ifdef CONFIG_HIGHMEM
1257 return (idx == ZONE_HIGHMEM ||
1258 (idx == ZONE_MOVABLE && movable_zone == ZONE_HIGHMEM));
1259 #else
1260 return 0;
1261 #endif
1262 }
1263
1264 #ifdef CONFIG_ZONE_DMA
1265 bool has_managed_dma(void);
1266 #else
has_managed_dma(void)1267 static inline bool has_managed_dma(void)
1268 {
1269 return false;
1270 }
1271 #endif
1272
1273 /**
1274 * is_highmem - helper function to quickly check if a struct zone is a
1275 * highmem zone or not. This is an attempt to keep references
1276 * to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum.
1277 * @zone: pointer to struct zone variable
1278 * Return: 1 for a highmem zone, 0 otherwise
1279 */
is_highmem(struct zone * zone)1280 static inline int is_highmem(struct zone *zone)
1281 {
1282 #ifdef CONFIG_HIGHMEM
1283 return is_highmem_idx(zone_idx(zone));
1284 #else
1285 return 0;
1286 #endif
1287 }
1288
1289 /* These two functions are used to setup the per zone pages min values */
1290 struct ctl_table;
1291
1292 int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void *, size_t *,
1293 loff_t *);
1294 int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *,
1295 size_t *, loff_t *);
1296 extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
1297 int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *,
1298 size_t *, loff_t *);
1299 int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *, int,
1300 void *, size_t *, loff_t *);
1301 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
1302 void *, size_t *, loff_t *);
1303 int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
1304 void *, size_t *, loff_t *);
1305 int numa_zonelist_order_handler(struct ctl_table *, int,
1306 void *, size_t *, loff_t *);
1307 extern int percpu_pagelist_high_fraction;
1308 extern char numa_zonelist_order[];
1309 #define NUMA_ZONELIST_ORDER_LEN 16
1310
1311 #ifndef CONFIG_NUMA
1312
1313 extern struct pglist_data contig_page_data;
NODE_DATA(int nid)1314 static inline struct pglist_data *NODE_DATA(int nid)
1315 {
1316 return &contig_page_data;
1317 }
1318 #define NODE_MEM_MAP(nid) mem_map
1319
1320 #else /* CONFIG_NUMA */
1321
1322 #include <asm/mmzone.h>
1323
1324 #endif /* !CONFIG_NUMA */
1325
1326 extern struct pglist_data *first_online_pgdat(void);
1327 extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat);
1328 extern struct zone *next_zone(struct zone *zone);
1329 extern int isolate_anon_lru_page(struct page *page);
1330
1331 /**
1332 * for_each_online_pgdat - helper macro to iterate over all online nodes
1333 * @pgdat: pointer to a pg_data_t variable
1334 */
1335 #define for_each_online_pgdat(pgdat) \
1336 for (pgdat = first_online_pgdat(); \
1337 pgdat; \
1338 pgdat = next_online_pgdat(pgdat))
1339 /**
1340 * for_each_zone - helper macro to iterate over all memory zones
1341 * @zone: pointer to struct zone variable
1342 *
1343 * The user only needs to declare the zone variable, for_each_zone
1344 * fills it in.
1345 */
1346 #define for_each_zone(zone) \
1347 for (zone = (first_online_pgdat())->node_zones; \
1348 zone; \
1349 zone = next_zone(zone))
1350
1351 #define for_each_populated_zone(zone) \
1352 for (zone = (first_online_pgdat())->node_zones; \
1353 zone; \
1354 zone = next_zone(zone)) \
1355 if (!populated_zone(zone)) \
1356 ; /* do nothing */ \
1357 else
1358
zonelist_zone(struct zoneref * zoneref)1359 static inline struct zone *zonelist_zone(struct zoneref *zoneref)
1360 {
1361 return zoneref->zone;
1362 }
1363
zonelist_zone_idx(struct zoneref * zoneref)1364 static inline int zonelist_zone_idx(struct zoneref *zoneref)
1365 {
1366 return zoneref->zone_idx;
1367 }
1368
zonelist_node_idx(struct zoneref * zoneref)1369 static inline int zonelist_node_idx(struct zoneref *zoneref)
1370 {
1371 return zone_to_nid(zoneref->zone);
1372 }
1373
1374 struct zoneref *__next_zones_zonelist(struct zoneref *z,
1375 enum zone_type highest_zoneidx,
1376 nodemask_t *nodes);
1377
1378 /**
1379 * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point
1380 * @z: The cursor used as a starting point for the search
1381 * @highest_zoneidx: The zone index of the highest zone to return
1382 * @nodes: An optional nodemask to filter the zonelist with
1383 *
1384 * This function returns the next zone at or below a given zone index that is
1385 * within the allowed nodemask using a cursor as the starting point for the
1386 * search. The zoneref returned is a cursor that represents the current zone
1387 * being examined. It should be advanced by one before calling
1388 * next_zones_zonelist again.
1389 *
1390 * Return: the next zone at or below highest_zoneidx within the allowed
1391 * nodemask using a cursor within a zonelist as a starting point
1392 */
next_zones_zonelist(struct zoneref * z,enum zone_type highest_zoneidx,nodemask_t * nodes)1393 static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z,
1394 enum zone_type highest_zoneidx,
1395 nodemask_t *nodes)
1396 {
1397 if (likely(!nodes && zonelist_zone_idx(z) <= highest_zoneidx))
1398 return z;
1399 return __next_zones_zonelist(z, highest_zoneidx, nodes);
1400 }
1401
1402 /**
1403 * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist
1404 * @zonelist: The zonelist to search for a suitable zone
1405 * @highest_zoneidx: The zone index of the highest zone to return
1406 * @nodes: An optional nodemask to filter the zonelist with
1407 *
1408 * This function returns the first zone at or below a given zone index that is
1409 * within the allowed nodemask. The zoneref returned is a cursor that can be
1410 * used to iterate the zonelist with next_zones_zonelist by advancing it by
1411 * one before calling.
1412 *
1413 * When no eligible zone is found, zoneref->zone is NULL (zoneref itself is
1414 * never NULL). This may happen either genuinely, or due to concurrent nodemask
1415 * update due to cpuset modification.
1416 *
1417 * Return: Zoneref pointer for the first suitable zone found
1418 */
first_zones_zonelist(struct zonelist * zonelist,enum zone_type highest_zoneidx,nodemask_t * nodes)1419 static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
1420 enum zone_type highest_zoneidx,
1421 nodemask_t *nodes)
1422 {
1423 return next_zones_zonelist(zonelist->_zonerefs,
1424 highest_zoneidx, nodes);
1425 }
1426
1427 /**
1428 * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask
1429 * @zone: The current zone in the iterator
1430 * @z: The current pointer within zonelist->_zonerefs being iterated
1431 * @zlist: The zonelist being iterated
1432 * @highidx: The zone index of the highest zone to return
1433 * @nodemask: Nodemask allowed by the allocator
1434 *
1435 * This iterator iterates though all zones at or below a given zone index and
1436 * within a given nodemask
1437 */
1438 #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \
1439 for (z = first_zones_zonelist(zlist, highidx, nodemask), zone = zonelist_zone(z); \
1440 zone; \
1441 z = next_zones_zonelist(++z, highidx, nodemask), \
1442 zone = zonelist_zone(z))
1443
1444 #define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) \
1445 for (zone = z->zone; \
1446 zone; \
1447 z = next_zones_zonelist(++z, highidx, nodemask), \
1448 zone = zonelist_zone(z))
1449
1450
1451 /**
1452 * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
1453 * @zone: The current zone in the iterator
1454 * @z: The current pointer within zonelist->zones being iterated
1455 * @zlist: The zonelist being iterated
1456 * @highidx: The zone index of the highest zone to return
1457 *
1458 * This iterator iterates though all zones at or below a given zone index.
1459 */
1460 #define for_each_zone_zonelist(zone, z, zlist, highidx) \
1461 for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL)
1462
1463 #ifdef CONFIG_SPARSEMEM
1464 #include <asm/sparsemem.h>
1465 #endif
1466
1467 #ifdef CONFIG_FLATMEM
1468 #define pfn_to_nid(pfn) (0)
1469 #endif
1470
1471 #ifdef CONFIG_SPARSEMEM
1472
1473 /*
1474 * PA_SECTION_SHIFT physical address to/from section number
1475 * PFN_SECTION_SHIFT pfn to/from section number
1476 */
1477 #define PA_SECTION_SHIFT (SECTION_SIZE_BITS)
1478 #define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT)
1479
1480 #define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT)
1481
1482 #define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT)
1483 #define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1))
1484
1485 #define SECTION_BLOCKFLAGS_BITS \
1486 ((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS)
1487
1488 #if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS
1489 #error Allocator MAX_ORDER exceeds SECTION_SIZE
1490 #endif
1491
pfn_to_section_nr(unsigned long pfn)1492 static inline unsigned long pfn_to_section_nr(unsigned long pfn)
1493 {
1494 return pfn >> PFN_SECTION_SHIFT;
1495 }
section_nr_to_pfn(unsigned long sec)1496 static inline unsigned long section_nr_to_pfn(unsigned long sec)
1497 {
1498 return sec << PFN_SECTION_SHIFT;
1499 }
1500
1501 #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK)
1502 #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK)
1503
1504 #define SUBSECTION_SHIFT 21
1505 #define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT)
1506
1507 #define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT)
1508 #define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT)
1509 #define PAGE_SUBSECTION_MASK (~(PAGES_PER_SUBSECTION-1))
1510
1511 #if SUBSECTION_SHIFT > SECTION_SIZE_BITS
1512 #error Subsection size exceeds section size
1513 #else
1514 #define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT))
1515 #endif
1516
1517 #define SUBSECTION_ALIGN_UP(pfn) ALIGN((pfn), PAGES_PER_SUBSECTION)
1518 #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
1519
1520 struct mem_section_usage {
1521 #ifdef CONFIG_SPARSEMEM_VMEMMAP
1522 DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
1523 #endif
1524 /* See declaration of similar field in struct zone */
1525 unsigned long pageblock_flags[0];
1526 };
1527
1528 void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
1529
1530 struct page;
1531 struct page_ext;
1532 struct mem_section {
1533 /*
1534 * This is, logically, a pointer to an array of struct
1535 * pages. However, it is stored with some other magic.
1536 * (see sparse.c::sparse_init_one_section())
1537 *
1538 * Additionally during early boot we encode node id of
1539 * the location of the section here to guide allocation.
1540 * (see sparse.c::memory_present())
1541 *
1542 * Making it a UL at least makes someone do a cast
1543 * before using it wrong.
1544 */
1545 unsigned long section_mem_map;
1546
1547 struct mem_section_usage *usage;
1548 #ifdef CONFIG_PAGE_EXTENSION
1549 /*
1550 * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use
1551 * section. (see page_ext.h about this.)
1552 */
1553 struct page_ext *page_ext;
1554 unsigned long pad;
1555 #endif
1556 /*
1557 * WARNING: mem_section must be a power-of-2 in size for the
1558 * calculation and use of SECTION_ROOT_MASK to make sense.
1559 */
1560 };
1561
1562 #ifdef CONFIG_SPARSEMEM_EXTREME
1563 #define SECTIONS_PER_ROOT (PAGE_SIZE / sizeof (struct mem_section))
1564 #else
1565 #define SECTIONS_PER_ROOT 1
1566 #endif
1567
1568 #define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT)
1569 #define NR_SECTION_ROOTS DIV_ROUND_UP(NR_MEM_SECTIONS, SECTIONS_PER_ROOT)
1570 #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1)
1571
1572 #ifdef CONFIG_SPARSEMEM_EXTREME
1573 extern struct mem_section **mem_section;
1574 #else
1575 extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
1576 #endif
1577
section_to_usemap(struct mem_section * ms)1578 static inline unsigned long *section_to_usemap(struct mem_section *ms)
1579 {
1580 return ms->usage->pageblock_flags;
1581 }
1582
__nr_to_section(unsigned long nr)1583 static inline struct mem_section *__nr_to_section(unsigned long nr)
1584 {
1585 unsigned long root = SECTION_NR_TO_ROOT(nr);
1586
1587 if (unlikely(root >= NR_SECTION_ROOTS))
1588 return NULL;
1589
1590 #ifdef CONFIG_SPARSEMEM_EXTREME
1591 if (!mem_section || !mem_section[root])
1592 return NULL;
1593 #endif
1594 return &mem_section[root][nr & SECTION_ROOT_MASK];
1595 }
1596 extern size_t mem_section_usage_size(void);
1597
1598 /*
1599 * We use the lower bits of the mem_map pointer to store
1600 * a little bit of information. The pointer is calculated
1601 * as mem_map - section_nr_to_pfn(pnum). The result is
1602 * aligned to the minimum alignment of the two values:
1603 * 1. All mem_map arrays are page-aligned.
1604 * 2. section_nr_to_pfn() always clears PFN_SECTION_SHIFT
1605 * lowest bits. PFN_SECTION_SHIFT is arch-specific
1606 * (equal SECTION_SIZE_BITS - PAGE_SHIFT), and the
1607 * worst combination is powerpc with 256k pages,
1608 * which results in PFN_SECTION_SHIFT equal 6.
1609 * To sum it up, at least 6 bits are available.
1610 */
1611 #define SECTION_MARKED_PRESENT (1UL<<0)
1612 #define SECTION_HAS_MEM_MAP (1UL<<1)
1613 #define SECTION_IS_ONLINE (1UL<<2)
1614 #define SECTION_IS_EARLY (1UL<<3)
1615 #define SECTION_TAINT_ZONE_DEVICE (1UL<<4)
1616 #define SECTION_MAP_LAST_BIT (1UL<<5)
1617 #define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
1618 #define SECTION_NID_SHIFT 6
1619
__section_mem_map_addr(struct mem_section * section)1620 static inline struct page *__section_mem_map_addr(struct mem_section *section)
1621 {
1622 unsigned long map = section->section_mem_map;
1623 map &= SECTION_MAP_MASK;
1624 return (struct page *)map;
1625 }
1626
present_section(struct mem_section * section)1627 static inline int present_section(struct mem_section *section)
1628 {
1629 return (section && (section->section_mem_map & SECTION_MARKED_PRESENT));
1630 }
1631
present_section_nr(unsigned long nr)1632 static inline int present_section_nr(unsigned long nr)
1633 {
1634 return present_section(__nr_to_section(nr));
1635 }
1636
valid_section(struct mem_section * section)1637 static inline int valid_section(struct mem_section *section)
1638 {
1639 return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP));
1640 }
1641
early_section(struct mem_section * section)1642 static inline int early_section(struct mem_section *section)
1643 {
1644 return (section && (section->section_mem_map & SECTION_IS_EARLY));
1645 }
1646
valid_section_nr(unsigned long nr)1647 static inline int valid_section_nr(unsigned long nr)
1648 {
1649 return valid_section(__nr_to_section(nr));
1650 }
1651
online_section(struct mem_section * section)1652 static inline int online_section(struct mem_section *section)
1653 {
1654 return (section && (section->section_mem_map & SECTION_IS_ONLINE));
1655 }
1656
online_device_section(struct mem_section * section)1657 static inline int online_device_section(struct mem_section *section)
1658 {
1659 unsigned long flags = SECTION_IS_ONLINE | SECTION_TAINT_ZONE_DEVICE;
1660
1661 return section && ((section->section_mem_map & flags) == flags);
1662 }
1663
online_section_nr(unsigned long nr)1664 static inline int online_section_nr(unsigned long nr)
1665 {
1666 return online_section(__nr_to_section(nr));
1667 }
1668
1669 #ifdef CONFIG_MEMORY_HOTPLUG
1670 void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
1671 void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
1672 #endif
1673
__pfn_to_section(unsigned long pfn)1674 static inline struct mem_section *__pfn_to_section(unsigned long pfn)
1675 {
1676 return __nr_to_section(pfn_to_section_nr(pfn));
1677 }
1678
1679 extern unsigned long __highest_present_section_nr;
1680
subsection_map_index(unsigned long pfn)1681 static inline int subsection_map_index(unsigned long pfn)
1682 {
1683 return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION;
1684 }
1685
1686 #ifdef CONFIG_SPARSEMEM_VMEMMAP
pfn_section_valid(struct mem_section * ms,unsigned long pfn)1687 static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
1688 {
1689 int idx = subsection_map_index(pfn);
1690
1691 return test_bit(idx, ms->usage->subsection_map);
1692 }
1693 #else
pfn_section_valid(struct mem_section * ms,unsigned long pfn)1694 static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
1695 {
1696 return 1;
1697 }
1698 #endif
1699
1700 #ifndef CONFIG_HAVE_ARCH_PFN_VALID
1701 /**
1702 * pfn_valid - check if there is a valid memory map entry for a PFN
1703 * @pfn: the page frame number to check
1704 *
1705 * Check if there is a valid memory map entry aka struct page for the @pfn.
1706 * Note, that availability of the memory map entry does not imply that
1707 * there is actual usable memory at that @pfn. The struct page may
1708 * represent a hole or an unusable page frame.
1709 *
1710 * Return: 1 for PFNs that have memory map entries and 0 otherwise
1711 */
pfn_valid(unsigned long pfn)1712 static inline int pfn_valid(unsigned long pfn)
1713 {
1714 struct mem_section *ms;
1715
1716 /*
1717 * Ensure the upper PAGE_SHIFT bits are clear in the
1718 * pfn. Else it might lead to false positives when
1719 * some of the upper bits are set, but the lower bits
1720 * match a valid pfn.
1721 */
1722 if (PHYS_PFN(PFN_PHYS(pfn)) != pfn)
1723 return 0;
1724
1725 if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
1726 return 0;
1727 ms = __pfn_to_section(pfn);
1728 if (!valid_section(ms))
1729 return 0;
1730 /*
1731 * Traditionally early sections always returned pfn_valid() for
1732 * the entire section-sized span.
1733 */
1734 return early_section(ms) || pfn_section_valid(ms, pfn);
1735 }
1736 #endif
1737
pfn_in_present_section(unsigned long pfn)1738 static inline int pfn_in_present_section(unsigned long pfn)
1739 {
1740 if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
1741 return 0;
1742 return present_section(__pfn_to_section(pfn));
1743 }
1744
next_present_section_nr(unsigned long section_nr)1745 static inline unsigned long next_present_section_nr(unsigned long section_nr)
1746 {
1747 while (++section_nr <= __highest_present_section_nr) {
1748 if (present_section_nr(section_nr))
1749 return section_nr;
1750 }
1751
1752 return -1;
1753 }
1754
1755 /*
1756 * These are _only_ used during initialisation, therefore they
1757 * can use __initdata ... They could have names to indicate
1758 * this restriction.
1759 */
1760 #ifdef CONFIG_NUMA
1761 #define pfn_to_nid(pfn) \
1762 ({ \
1763 unsigned long __pfn_to_nid_pfn = (pfn); \
1764 page_to_nid(pfn_to_page(__pfn_to_nid_pfn)); \
1765 })
1766 #else
1767 #define pfn_to_nid(pfn) (0)
1768 #endif
1769
1770 void sparse_init(void);
1771 #else
1772 #define sparse_init() do {} while (0)
1773 #define sparse_index_init(_sec, _nid) do {} while (0)
1774 #define pfn_in_present_section pfn_valid
1775 #define subsection_map_init(_pfn, _nr_pages) do {} while (0)
1776 #endif /* CONFIG_SPARSEMEM */
1777
1778 #endif /* !__GENERATING_BOUNDS.H */
1779 #endif /* !__ASSEMBLY__ */
1780 #endif /* _LINUX_MMZONE_H */
1781