1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Procedures for maintaining information about logical memory blocks.
4 *
5 * Peter Bergner, IBM Corp. June 2001.
6 * Copyright (C) 2001 Peter Bergner.
7 */
8
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/init.h>
12 #include <linux/bitops.h>
13 #include <linux/poison.h>
14 #include <linux/pfn.h>
15 #include <linux/debugfs.h>
16 #include <linux/kmemleak.h>
17 #include <linux/seq_file.h>
18 #include <linux/memblock.h>
19
20 #include <asm/sections.h>
21 #include <linux/io.h>
22 #include <linux/sort.h>
23 #include <linux/proc_fs.h>
24
25 #include "internal.h"
26
27 #define INIT_MEMBLOCK_REGIONS 128
28 #define INIT_PHYSMEM_REGIONS 4
29
30 #ifndef INIT_MEMBLOCK_RESERVED_REGIONS
31 # define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS
32 #endif
33
34 #ifndef INIT_MEMBLOCK_MEMORY_REGIONS
35 #define INIT_MEMBLOCK_MEMORY_REGIONS INIT_MEMBLOCK_REGIONS
36 #endif
37
38 /**
39 * DOC: memblock overview
40 *
41 * Memblock is a method of managing memory regions during the early
42 * boot period when the usual kernel memory allocators are not up and
43 * running.
44 *
45 * Memblock views the system memory as collections of contiguous
46 * regions. There are several types of these collections:
47 *
48 * * ``memory`` - describes the physical memory available to the
49 * kernel; this may differ from the actual physical memory installed
50 * in the system, for instance when the memory is restricted with
51 * ``mem=`` command line parameter
52 * * ``reserved`` - describes the regions that were allocated
53 * * ``physmem`` - describes the actual physical memory available during
54 * boot regardless of the possible restrictions and memory hot(un)plug;
55 * the ``physmem`` type is only available on some architectures.
56 *
57 * Each region is represented by struct memblock_region that
58 * defines the region extents, its attributes and NUMA node id on NUMA
59 * systems. Every memory type is described by the struct memblock_type
60 * which contains an array of memory regions along with
61 * the allocator metadata. The "memory" and "reserved" types are nicely
62 * wrapped with struct memblock. This structure is statically
63 * initialized at build time. The region arrays are initially sized to
64 * %INIT_MEMBLOCK_MEMORY_REGIONS for "memory" and
65 * %INIT_MEMBLOCK_RESERVED_REGIONS for "reserved". The region array
66 * for "physmem" is initially sized to %INIT_PHYSMEM_REGIONS.
67 * The memblock_allow_resize() enables automatic resizing of the region
68 * arrays during addition of new regions. This feature should be used
69 * with care so that memory allocated for the region array will not
70 * overlap with areas that should be reserved, for example initrd.
71 *
72 * The early architecture setup should tell memblock what the physical
73 * memory layout is by using memblock_add() or memblock_add_node()
74 * functions. The first function does not assign the region to a NUMA
75 * node and it is appropriate for UMA systems. Yet, it is possible to
76 * use it on NUMA systems as well and assign the region to a NUMA node
77 * later in the setup process using memblock_set_node(). The
78 * memblock_add_node() performs such an assignment directly.
79 *
80 * Once memblock is setup the memory can be allocated using one of the
81 * API variants:
82 *
83 * * memblock_phys_alloc*() - these functions return the **physical**
84 * address of the allocated memory
85 * * memblock_alloc*() - these functions return the **virtual** address
86 * of the allocated memory.
87 *
88 * Note, that both API variants use implicit assumptions about allowed
89 * memory ranges and the fallback methods. Consult the documentation
90 * of memblock_alloc_internal() and memblock_alloc_range_nid()
91 * functions for more elaborate description.
92 *
93 * As the system boot progresses, the architecture specific mem_init()
94 * function frees all the memory to the buddy page allocator.
95 *
96 * Unless an architecture enables %CONFIG_ARCH_KEEP_MEMBLOCK, the
97 * memblock data structures (except "physmem") will be discarded after the
98 * system initialization completes.
99 */
100
101 #ifndef CONFIG_NUMA
102 struct pglist_data __refdata contig_page_data;
103 EXPORT_SYMBOL(contig_page_data);
104 #endif
105
106 unsigned long max_low_pfn;
107 unsigned long min_low_pfn;
108 unsigned long max_pfn;
109 unsigned long long max_possible_pfn;
110
111 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_MEMORY_REGIONS] __initdata_memblock;
112 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock;
113 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
114 static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS];
115 #endif
116
117 struct memblock memblock __initdata_memblock = {
118 .memory.regions = memblock_memory_init_regions,
119 .memory.max = INIT_MEMBLOCK_MEMORY_REGIONS,
120 .memory.name = "memory",
121
122 .reserved.regions = memblock_reserved_init_regions,
123 .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS,
124 .reserved.name = "reserved",
125
126 .bottom_up = false,
127 .current_limit = MEMBLOCK_ALLOC_ANYWHERE,
128 };
129
130 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
131 struct memblock_type physmem = {
132 .regions = memblock_physmem_init_regions,
133 .max = INIT_PHYSMEM_REGIONS,
134 .name = "physmem",
135 };
136 #endif
137
138 static long memsize_kinit;
139 static bool memblock_memsize_tracking __initdata_memblock = true;
140
141 /*
142 * keep a pointer to &memblock.memory in the text section to use it in
143 * __next_mem_range() and its helpers.
144 * For architectures that do not keep memblock data after init, this
145 * pointer will be reset to NULL at memblock_discard()
146 */
147 static __refdata struct memblock_type *memblock_memory = &memblock.memory;
148
149 #define for_each_memblock_type(i, memblock_type, rgn) \
150 for (i = 0, rgn = &memblock_type->regions[0]; \
151 i < memblock_type->cnt; \
152 i++, rgn = &memblock_type->regions[i])
153
154 #define memblock_dbg(fmt, ...) \
155 do { \
156 if (memblock_debug) \
157 pr_info(fmt, ##__VA_ARGS__); \
158 } while (0)
159
160 static int memblock_debug __initdata_memblock;
161 static bool system_has_some_mirror __initdata_memblock;
162 static int memblock_can_resize __initdata_memblock;
163 static int memblock_memory_in_slab __initdata_memblock;
164 static int memblock_reserved_in_slab __initdata_memblock;
165
memblock_has_mirror(void)166 bool __init_memblock memblock_has_mirror(void)
167 {
168 return system_has_some_mirror;
169 }
170
choose_memblock_flags(void)171 static enum memblock_flags __init_memblock choose_memblock_flags(void)
172 {
173 return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE;
174 }
175
176 /* adjust *@size so that (@base + *@size) doesn't overflow, return new size */
memblock_cap_size(phys_addr_t base,phys_addr_t * size)177 static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
178 {
179 return *size = min(*size, PHYS_ADDR_MAX - base);
180 }
181
182 /*
183 * Address comparison utilities
184 */
185 unsigned long __init_memblock
memblock_addrs_overlap(phys_addr_t base1,phys_addr_t size1,phys_addr_t base2,phys_addr_t size2)186 memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2,
187 phys_addr_t size2)
188 {
189 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
190 }
191
memblock_overlaps_region(struct memblock_type * type,phys_addr_t base,phys_addr_t size)192 bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
193 phys_addr_t base, phys_addr_t size)
194 {
195 unsigned long i;
196
197 memblock_cap_size(base, &size);
198
199 for (i = 0; i < type->cnt; i++)
200 if (memblock_addrs_overlap(base, size, type->regions[i].base,
201 type->regions[i].size))
202 return true;
203 return false;
204 }
205
206 /**
207 * __memblock_find_range_bottom_up - find free area utility in bottom-up
208 * @start: start of candidate range
209 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
210 * %MEMBLOCK_ALLOC_ACCESSIBLE
211 * @size: size of free area to find
212 * @align: alignment of free area to find
213 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
214 * @flags: pick from blocks based on memory attributes
215 *
216 * Utility called from memblock_find_in_range_node(), find free area bottom-up.
217 *
218 * Return:
219 * Found address on success, 0 on failure.
220 */
221 static phys_addr_t __init_memblock
__memblock_find_range_bottom_up(phys_addr_t start,phys_addr_t end,phys_addr_t size,phys_addr_t align,int nid,enum memblock_flags flags)222 __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
223 phys_addr_t size, phys_addr_t align, int nid,
224 enum memblock_flags flags)
225 {
226 phys_addr_t this_start, this_end, cand;
227 u64 i;
228
229 for_each_free_mem_range(i, nid, flags, &this_start, &this_end, NULL) {
230 this_start = clamp(this_start, start, end);
231 this_end = clamp(this_end, start, end);
232
233 cand = round_up(this_start, align);
234 if (cand < this_end && this_end - cand >= size)
235 return cand;
236 }
237
238 return 0;
239 }
240
241 /**
242 * __memblock_find_range_top_down - find free area utility, in top-down
243 * @start: start of candidate range
244 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
245 * %MEMBLOCK_ALLOC_ACCESSIBLE
246 * @size: size of free area to find
247 * @align: alignment of free area to find
248 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
249 * @flags: pick from blocks based on memory attributes
250 *
251 * Utility called from memblock_find_in_range_node(), find free area top-down.
252 *
253 * Return:
254 * Found address on success, 0 on failure.
255 */
256 static phys_addr_t __init_memblock
__memblock_find_range_top_down(phys_addr_t start,phys_addr_t end,phys_addr_t size,phys_addr_t align,int nid,enum memblock_flags flags)257 __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
258 phys_addr_t size, phys_addr_t align, int nid,
259 enum memblock_flags flags)
260 {
261 phys_addr_t this_start, this_end, cand;
262 u64 i;
263
264 for_each_free_mem_range_reverse(i, nid, flags, &this_start, &this_end,
265 NULL) {
266 this_start = clamp(this_start, start, end);
267 this_end = clamp(this_end, start, end);
268
269 if (this_end < size)
270 continue;
271
272 cand = round_down(this_end - size, align);
273 if (cand >= this_start)
274 return cand;
275 }
276
277 return 0;
278 }
279
280 /**
281 * memblock_find_in_range_node - find free area in given range and node
282 * @size: size of free area to find
283 * @align: alignment of free area to find
284 * @start: start of candidate range
285 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
286 * %MEMBLOCK_ALLOC_ACCESSIBLE
287 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
288 * @flags: pick from blocks based on memory attributes
289 *
290 * Find @size free area aligned to @align in the specified range and node.
291 *
292 * Return:
293 * Found address on success, 0 on failure.
294 */
memblock_find_in_range_node(phys_addr_t size,phys_addr_t align,phys_addr_t start,phys_addr_t end,int nid,enum memblock_flags flags)295 static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
296 phys_addr_t align, phys_addr_t start,
297 phys_addr_t end, int nid,
298 enum memblock_flags flags)
299 {
300 /* pump up @end */
301 if (end == MEMBLOCK_ALLOC_ACCESSIBLE ||
302 end == MEMBLOCK_ALLOC_NOLEAKTRACE)
303 end = memblock.current_limit;
304
305 /* avoid allocating the first page */
306 start = max_t(phys_addr_t, start, PAGE_SIZE);
307 end = max(start, end);
308
309 if (memblock_bottom_up())
310 return __memblock_find_range_bottom_up(start, end, size, align,
311 nid, flags);
312 else
313 return __memblock_find_range_top_down(start, end, size, align,
314 nid, flags);
315 }
316
317 /**
318 * memblock_find_in_range - find free area in given range
319 * @start: start of candidate range
320 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
321 * %MEMBLOCK_ALLOC_ACCESSIBLE
322 * @size: size of free area to find
323 * @align: alignment of free area to find
324 *
325 * Find @size free area aligned to @align in the specified range.
326 *
327 * Return:
328 * Found address on success, 0 on failure.
329 */
memblock_find_in_range(phys_addr_t start,phys_addr_t end,phys_addr_t size,phys_addr_t align)330 static phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
331 phys_addr_t end, phys_addr_t size,
332 phys_addr_t align)
333 {
334 phys_addr_t ret;
335 enum memblock_flags flags = choose_memblock_flags();
336
337 again:
338 ret = memblock_find_in_range_node(size, align, start, end,
339 NUMA_NO_NODE, flags);
340
341 if (!ret && (flags & MEMBLOCK_MIRROR)) {
342 pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n",
343 &size);
344 flags &= ~MEMBLOCK_MIRROR;
345 goto again;
346 }
347
348 return ret;
349 }
350
memblock_remove_region(struct memblock_type * type,unsigned long r)351 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
352 {
353 type->total_size -= type->regions[r].size;
354 memmove(&type->regions[r], &type->regions[r + 1],
355 (type->cnt - (r + 1)) * sizeof(type->regions[r]));
356 type->cnt--;
357
358 /* Special case for empty arrays */
359 if (type->cnt == 0) {
360 WARN_ON(type->total_size != 0);
361 type->regions[0].base = 0;
362 type->regions[0].size = 0;
363 type->regions[0].flags = 0;
364 memblock_set_region_node(&type->regions[0], MAX_NUMNODES);
365 }
366 }
367
368 #ifndef CONFIG_ARCH_KEEP_MEMBLOCK
369 /**
370 * memblock_discard - discard memory and reserved arrays if they were allocated
371 */
memblock_discard(void)372 void __init memblock_discard(void)
373 {
374 phys_addr_t addr, size;
375
376 if (memblock.reserved.regions != memblock_reserved_init_regions) {
377 addr = __pa(memblock.reserved.regions);
378 size = PAGE_ALIGN(sizeof(struct memblock_region) *
379 memblock.reserved.max);
380 if (memblock_reserved_in_slab)
381 kfree(memblock.reserved.regions);
382 else
383 memblock_free_late(addr, size);
384 }
385
386 if (memblock.memory.regions != memblock_memory_init_regions) {
387 addr = __pa(memblock.memory.regions);
388 size = PAGE_ALIGN(sizeof(struct memblock_region) *
389 memblock.memory.max);
390 if (memblock_memory_in_slab)
391 kfree(memblock.memory.regions);
392 else
393 memblock_free_late(addr, size);
394 }
395
396 memblock_memory = NULL;
397 }
398 #endif
399
400 /**
401 * memblock_double_array - double the size of the memblock regions array
402 * @type: memblock type of the regions array being doubled
403 * @new_area_start: starting address of memory range to avoid overlap with
404 * @new_area_size: size of memory range to avoid overlap with
405 *
406 * Double the size of the @type regions array. If memblock is being used to
407 * allocate memory for a new reserved regions array and there is a previously
408 * allocated memory range [@new_area_start, @new_area_start + @new_area_size]
409 * waiting to be reserved, ensure the memory used by the new array does
410 * not overlap.
411 *
412 * Return:
413 * 0 on success, -1 on failure.
414 */
memblock_double_array(struct memblock_type * type,phys_addr_t new_area_start,phys_addr_t new_area_size)415 static int __init_memblock memblock_double_array(struct memblock_type *type,
416 phys_addr_t new_area_start,
417 phys_addr_t new_area_size)
418 {
419 struct memblock_region *new_array, *old_array;
420 phys_addr_t old_alloc_size, new_alloc_size;
421 phys_addr_t old_size, new_size, addr, new_end;
422 int use_slab = slab_is_available();
423 int *in_slab;
424
425 /* We don't allow resizing until we know about the reserved regions
426 * of memory that aren't suitable for allocation
427 */
428 if (!memblock_can_resize)
429 panic("memblock: cannot resize %s array\n", type->name);
430
431 /* Calculate new doubled size */
432 old_size = type->max * sizeof(struct memblock_region);
433 new_size = old_size << 1;
434 /*
435 * We need to allocated new one align to PAGE_SIZE,
436 * so we can free them completely later.
437 */
438 old_alloc_size = PAGE_ALIGN(old_size);
439 new_alloc_size = PAGE_ALIGN(new_size);
440
441 /* Retrieve the slab flag */
442 if (type == &memblock.memory)
443 in_slab = &memblock_memory_in_slab;
444 else
445 in_slab = &memblock_reserved_in_slab;
446
447 /* Try to find some space for it */
448 if (use_slab) {
449 new_array = kmalloc(new_size, GFP_KERNEL);
450 addr = new_array ? __pa(new_array) : 0;
451 } else {
452 /* only exclude range when trying to double reserved.regions */
453 if (type != &memblock.reserved)
454 new_area_start = new_area_size = 0;
455
456 addr = memblock_find_in_range(new_area_start + new_area_size,
457 memblock.current_limit,
458 new_alloc_size, PAGE_SIZE);
459 if (!addr && new_area_size)
460 addr = memblock_find_in_range(0,
461 min(new_area_start, memblock.current_limit),
462 new_alloc_size, PAGE_SIZE);
463
464 if (addr) {
465 /* The memory may not have been accepted, yet. */
466 accept_memory(addr, new_alloc_size);
467
468 new_array = __va(addr);
469 } else {
470 new_array = NULL;
471 }
472 }
473 if (!addr) {
474 pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
475 type->name, type->max, type->max * 2);
476 return -1;
477 }
478
479 new_end = addr + new_size - 1;
480 memblock_dbg("memblock: %s is doubled to %ld at [%pa-%pa]",
481 type->name, type->max * 2, &addr, &new_end);
482
483 /*
484 * Found space, we now need to move the array over before we add the
485 * reserved region since it may be our reserved array itself that is
486 * full.
487 */
488 memcpy(new_array, type->regions, old_size);
489 memset(new_array + type->max, 0, old_size);
490 old_array = type->regions;
491 type->regions = new_array;
492 type->max <<= 1;
493
494 /* Free old array. We needn't free it if the array is the static one */
495 if (*in_slab)
496 kfree(old_array);
497 else if (old_array != memblock_memory_init_regions &&
498 old_array != memblock_reserved_init_regions)
499 memblock_free(old_array, old_alloc_size);
500
501 /*
502 * Reserve the new array if that comes from the memblock. Otherwise, we
503 * needn't do it
504 */
505 if (!use_slab)
506 BUG_ON(memblock_reserve(addr, new_alloc_size));
507
508 /* Update slab flag */
509 *in_slab = use_slab;
510
511 return 0;
512 }
513
514 /**
515 * memblock_merge_regions - merge neighboring compatible regions
516 * @type: memblock type to scan
517 * @start_rgn: start scanning from (@start_rgn - 1)
518 * @end_rgn: end scanning at (@end_rgn - 1)
519 * Scan @type and merge neighboring compatible regions in [@start_rgn - 1, @end_rgn)
520 */
memblock_merge_regions(struct memblock_type * type,unsigned long start_rgn,unsigned long end_rgn)521 static void __init_memblock memblock_merge_regions(struct memblock_type *type,
522 unsigned long start_rgn,
523 unsigned long end_rgn)
524 {
525 int i = 0;
526 if (start_rgn)
527 i = start_rgn - 1;
528 end_rgn = min(end_rgn, type->cnt - 1);
529 while (i < end_rgn) {
530 struct memblock_region *this = &type->regions[i];
531 struct memblock_region *next = &type->regions[i + 1];
532
533 if (this->base + this->size != next->base ||
534 memblock_get_region_node(this) !=
535 memblock_get_region_node(next) ||
536 this->flags != next->flags) {
537 BUG_ON(this->base + this->size > next->base);
538 i++;
539 continue;
540 }
541
542 this->size += next->size;
543 /* move forward from next + 1, index of which is i + 2 */
544 memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next));
545 type->cnt--;
546 end_rgn--;
547 }
548 }
549
550 /**
551 * memblock_insert_region - insert new memblock region
552 * @type: memblock type to insert into
553 * @idx: index for the insertion point
554 * @base: base address of the new region
555 * @size: size of the new region
556 * @nid: node id of the new region
557 * @flags: flags of the new region
558 *
559 * Insert new memblock region [@base, @base + @size) into @type at @idx.
560 * @type must already have extra room to accommodate the new region.
561 */
memblock_insert_region(struct memblock_type * type,int idx,phys_addr_t base,phys_addr_t size,int nid,enum memblock_flags flags)562 static void __init_memblock memblock_insert_region(struct memblock_type *type,
563 int idx, phys_addr_t base,
564 phys_addr_t size,
565 int nid,
566 enum memblock_flags flags)
567 {
568 struct memblock_region *rgn = &type->regions[idx];
569
570 BUG_ON(type->cnt >= type->max);
571 memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn));
572 rgn->base = base;
573 rgn->size = size;
574 rgn->flags = flags;
575 memblock_set_region_node(rgn, nid);
576 type->cnt++;
577 type->total_size += size;
578 }
579
580 /**
581 * memblock_add_range - add new memblock region
582 * @type: memblock type to add new region into
583 * @base: base address of the new region
584 * @size: size of the new region
585 * @nid: nid of the new region
586 * @flags: flags of the new region
587 *
588 * Add new memblock region [@base, @base + @size) into @type. The new region
589 * is allowed to overlap with existing ones - overlaps don't affect already
590 * existing regions. @type is guaranteed to be minimal (all neighbouring
591 * compatible regions are merged) after the addition.
592 *
593 * Return:
594 * 0 on success, -errno on failure.
595 */
memblock_add_range(struct memblock_type * type,phys_addr_t base,phys_addr_t size,int nid,enum memblock_flags flags)596 static int __init_memblock memblock_add_range(struct memblock_type *type,
597 phys_addr_t base, phys_addr_t size,
598 int nid, enum memblock_flags flags)
599 {
600 bool insert = false;
601 phys_addr_t obase = base;
602 phys_addr_t end = base + memblock_cap_size(base, &size);
603 int idx, nr_new, start_rgn = -1, end_rgn;
604 struct memblock_region *rgn;
605 phys_addr_t new_size = 0;
606
607 if (!size)
608 return 0;
609
610 /* special case for empty array */
611 if (type->regions[0].size == 0) {
612 WARN_ON(type->cnt != 0 || type->total_size);
613 type->regions[0].base = base;
614 type->regions[0].size = size;
615 type->regions[0].flags = flags;
616 memblock_set_region_node(&type->regions[0], nid);
617 type->total_size = size;
618 type->cnt = 1;
619 new_size = size;
620 goto done;
621 }
622
623 /*
624 * The worst case is when new range overlaps all existing regions,
625 * then we'll need type->cnt + 1 empty regions in @type. So if
626 * type->cnt * 2 + 1 is less than or equal to type->max, we know
627 * that there is enough empty regions in @type, and we can insert
628 * regions directly.
629 */
630 if (type->cnt * 2 + 1 <= type->max)
631 insert = true;
632
633 repeat:
634 /*
635 * The following is executed twice. Once with %false @insert and
636 * then with %true. The first counts the number of regions needed
637 * to accommodate the new area. The second actually inserts them.
638 */
639 base = obase;
640 nr_new = 0;
641
642 for_each_memblock_type(idx, type, rgn) {
643 phys_addr_t rbase = rgn->base;
644 phys_addr_t rend = rbase + rgn->size;
645
646 if (rbase >= end)
647 break;
648 if (rend <= base)
649 continue;
650 /*
651 * @rgn overlaps. If it separates the lower part of new
652 * area, insert that portion.
653 */
654 if (rbase > base) {
655 #ifdef CONFIG_NUMA
656 WARN_ON(nid != memblock_get_region_node(rgn));
657 #endif
658 WARN_ON(flags != rgn->flags);
659 nr_new++;
660 if (insert) {
661 if (start_rgn == -1)
662 start_rgn = idx;
663 end_rgn = idx + 1;
664 memblock_insert_region(type, idx++, base,
665 rbase - base, nid,
666 flags);
667 new_size += rbase - base;
668 }
669 }
670 /* area below @rend is dealt with, forget about it */
671 base = min(rend, end);
672 }
673
674 /* insert the remaining portion */
675 if (base < end) {
676 nr_new++;
677 if (insert) {
678 if (start_rgn == -1)
679 start_rgn = idx;
680 end_rgn = idx + 1;
681 memblock_insert_region(type, idx, base, end - base,
682 nid, flags);
683 new_size += end - base;
684 }
685 }
686
687 if (!nr_new)
688 return 0;
689
690 /*
691 * If this was the first round, resize array and repeat for actual
692 * insertions; otherwise, merge and return.
693 */
694 if (!insert) {
695 while (type->cnt + nr_new > type->max)
696 if (memblock_double_array(type, obase, size) < 0)
697 return -ENOMEM;
698 insert = true;
699 goto repeat;
700 } else {
701 memblock_merge_regions(type, start_rgn, end_rgn);
702 }
703 done:
704 if (memblock_memsize_tracking) {
705 if (new_size && type == &memblock.reserved) {
706 memblock_dbg("%s: kernel %lu %+ld\n", __func__,
707 memsize_kinit, (unsigned long)new_size);
708 memsize_kinit += size;
709 }
710 }
711 return 0;
712 }
713
714 /**
715 * memblock_add_node - add new memblock region within a NUMA node
716 * @base: base address of the new region
717 * @size: size of the new region
718 * @nid: nid of the new region
719 * @flags: flags of the new region
720 *
721 * Add new memblock region [@base, @base + @size) to the "memory"
722 * type. See memblock_add_range() description for mode details
723 *
724 * Return:
725 * 0 on success, -errno on failure.
726 */
memblock_add_node(phys_addr_t base,phys_addr_t size,int nid,enum memblock_flags flags)727 int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
728 int nid, enum memblock_flags flags)
729 {
730 phys_addr_t end = base + size - 1;
731
732 memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__,
733 &base, &end, nid, flags, (void *)_RET_IP_);
734
735 return memblock_add_range(&memblock.memory, base, size, nid, flags);
736 }
737
738 /**
739 * memblock_add - add new memblock region
740 * @base: base address of the new region
741 * @size: size of the new region
742 *
743 * Add new memblock region [@base, @base + @size) to the "memory"
744 * type. See memblock_add_range() description for mode details
745 *
746 * Return:
747 * 0 on success, -errno on failure.
748 */
memblock_add(phys_addr_t base,phys_addr_t size)749 int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
750 {
751 phys_addr_t end = base + size - 1;
752
753 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__,
754 &base, &end, (void *)_RET_IP_);
755
756 return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0);
757 }
758
759 /**
760 * memblock_validate_numa_coverage - check if amount of memory with
761 * no node ID assigned is less than a threshold
762 * @threshold_bytes: maximal memory size that can have unassigned node
763 * ID (in bytes).
764 *
765 * A buggy firmware may report memory that does not belong to any node.
766 * Check if amount of such memory is below @threshold_bytes.
767 *
768 * Return: true on success, false on failure.
769 */
memblock_validate_numa_coverage(unsigned long threshold_bytes)770 bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_bytes)
771 {
772 unsigned long nr_pages = 0;
773 unsigned long start_pfn, end_pfn, mem_size_mb;
774 int nid, i;
775
776 /* calculate lose page */
777 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
778 if (!numa_valid_node(nid))
779 nr_pages += end_pfn - start_pfn;
780 }
781
782 if ((nr_pages << PAGE_SHIFT) > threshold_bytes) {
783 mem_size_mb = memblock_phys_mem_size() >> 20;
784 pr_err("NUMA: no nodes coverage for %luMB of %luMB RAM\n",
785 (nr_pages << PAGE_SHIFT) >> 20, mem_size_mb);
786 return false;
787 }
788
789 return true;
790 }
791
792
793 /**
794 * memblock_isolate_range - isolate given range into disjoint memblocks
795 * @type: memblock type to isolate range for
796 * @base: base of range to isolate
797 * @size: size of range to isolate
798 * @start_rgn: out parameter for the start of isolated region
799 * @end_rgn: out parameter for the end of isolated region
800 *
801 * Walk @type and ensure that regions don't cross the boundaries defined by
802 * [@base, @base + @size). Crossing regions are split at the boundaries,
803 * which may create at most two more regions. The index of the first
804 * region inside the range is returned in *@start_rgn and the index of the
805 * first region after the range is returned in *@end_rgn.
806 *
807 * Return:
808 * 0 on success, -errno on failure.
809 */
memblock_isolate_range(struct memblock_type * type,phys_addr_t base,phys_addr_t size,int * start_rgn,int * end_rgn)810 static int __init_memblock memblock_isolate_range(struct memblock_type *type,
811 phys_addr_t base, phys_addr_t size,
812 int *start_rgn, int *end_rgn)
813 {
814 phys_addr_t end = base + memblock_cap_size(base, &size);
815 int idx;
816 struct memblock_region *rgn;
817
818 *start_rgn = *end_rgn = 0;
819
820 if (!size)
821 return 0;
822
823 /* we'll create at most two more regions */
824 while (type->cnt + 2 > type->max)
825 if (memblock_double_array(type, base, size) < 0)
826 return -ENOMEM;
827
828 for_each_memblock_type(idx, type, rgn) {
829 phys_addr_t rbase = rgn->base;
830 phys_addr_t rend = rbase + rgn->size;
831
832 if (rbase >= end)
833 break;
834 if (rend <= base)
835 continue;
836
837 if (rbase < base) {
838 /*
839 * @rgn intersects from below. Split and continue
840 * to process the next region - the new top half.
841 */
842 rgn->base = base;
843 rgn->size -= base - rbase;
844 type->total_size -= base - rbase;
845 memblock_insert_region(type, idx, rbase, base - rbase,
846 memblock_get_region_node(rgn),
847 rgn->flags);
848 } else if (rend > end) {
849 /*
850 * @rgn intersects from above. Split and redo the
851 * current region - the new bottom half.
852 */
853 rgn->base = end;
854 rgn->size -= end - rbase;
855 type->total_size -= end - rbase;
856 memblock_insert_region(type, idx--, rbase, end - rbase,
857 memblock_get_region_node(rgn),
858 rgn->flags);
859 } else {
860 /* @rgn is fully contained, record it */
861 if (!*end_rgn)
862 *start_rgn = idx;
863 *end_rgn = idx + 1;
864 }
865 }
866
867 return 0;
868 }
869
memblock_remove_range(struct memblock_type * type,phys_addr_t base,phys_addr_t size)870 static int __init_memblock memblock_remove_range(struct memblock_type *type,
871 phys_addr_t base, phys_addr_t size)
872 {
873 int start_rgn, end_rgn;
874 int i, ret;
875
876 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
877 if (ret)
878 return ret;
879
880 for (i = end_rgn - 1; i >= start_rgn; i--)
881 memblock_remove_region(type, i);
882 if (memblock_memsize_tracking) {
883 if (type == &memblock.reserved) {
884 memblock_dbg("%s: kernel %lu %+ld\n", __func__,
885 memsize_kinit, (unsigned long)size);
886 memsize_kinit -= size;
887 }
888 }
889 return 0;
890 }
891
memblock_remove(phys_addr_t base,phys_addr_t size)892 int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
893 {
894 phys_addr_t end = base + size - 1;
895
896 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__,
897 &base, &end, (void *)_RET_IP_);
898
899 return memblock_remove_range(&memblock.memory, base, size);
900 }
901
902 /**
903 * memblock_free - free boot memory allocation
904 * @ptr: starting address of the boot memory allocation
905 * @size: size of the boot memory block in bytes
906 *
907 * Free boot memory block previously allocated by memblock_alloc_xx() API.
908 * The freeing memory will not be released to the buddy allocator.
909 */
memblock_free(void * ptr,size_t size)910 void __init_memblock memblock_free(void *ptr, size_t size)
911 {
912 if (ptr)
913 memblock_phys_free(__pa(ptr), size);
914 }
915
916 /**
917 * memblock_phys_free - free boot memory block
918 * @base: phys starting address of the boot memory block
919 * @size: size of the boot memory block in bytes
920 *
921 * Free boot memory block previously allocated by memblock_phys_alloc_xx() API.
922 * The freeing memory will not be released to the buddy allocator.
923 */
memblock_phys_free(phys_addr_t base,phys_addr_t size)924 int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size)
925 {
926 phys_addr_t end = base + size - 1;
927
928 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__,
929 &base, &end, (void *)_RET_IP_);
930
931 kmemleak_free_part_phys(base, size);
932 return memblock_remove_range(&memblock.reserved, base, size);
933 }
934 #ifdef CONFIG_ARCH_KEEP_MEMBLOCK
935 EXPORT_SYMBOL_GPL(memblock_free);
936 #endif
937
memblock_reserve(phys_addr_t base,phys_addr_t size)938 int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
939 {
940 phys_addr_t end = base + size - 1;
941
942 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__,
943 &base, &end, (void *)_RET_IP_);
944
945 return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0);
946 }
947
948 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
memblock_physmem_add(phys_addr_t base,phys_addr_t size)949 int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size)
950 {
951 phys_addr_t end = base + size - 1;
952
953 memblock_dbg("%s: [%pa-%pa] %pS\n", __func__,
954 &base, &end, (void *)_RET_IP_);
955
956 return memblock_add_range(&physmem, base, size, MAX_NUMNODES, 0);
957 }
958 #endif
959
960 /**
961 * memblock_setclr_flag - set or clear flag for a memory region
962 * @type: memblock type to set/clear flag for
963 * @base: base address of the region
964 * @size: size of the region
965 * @set: set or clear the flag
966 * @flag: the flag to update
967 *
968 * This function isolates region [@base, @base + @size), and sets/clears flag
969 *
970 * Return: 0 on success, -errno on failure.
971 */
memblock_setclr_flag(struct memblock_type * type,phys_addr_t base,phys_addr_t size,int set,int flag)972 static int __init_memblock memblock_setclr_flag(struct memblock_type *type,
973 phys_addr_t base, phys_addr_t size, int set, int flag)
974 {
975 int i, ret, start_rgn, end_rgn;
976
977 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
978 if (ret)
979 return ret;
980
981 for (i = start_rgn; i < end_rgn; i++) {
982 struct memblock_region *r = &type->regions[i];
983
984 if (set)
985 r->flags |= flag;
986 else
987 r->flags &= ~flag;
988 }
989
990 memblock_merge_regions(type, start_rgn, end_rgn);
991 return 0;
992 }
993
994 /**
995 * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG.
996 * @base: the base phys addr of the region
997 * @size: the size of the region
998 *
999 * Return: 0 on success, -errno on failure.
1000 */
memblock_mark_hotplug(phys_addr_t base,phys_addr_t size)1001 int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
1002 {
1003 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_HOTPLUG);
1004 }
1005
1006 /**
1007 * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region.
1008 * @base: the base phys addr of the region
1009 * @size: the size of the region
1010 *
1011 * Return: 0 on success, -errno on failure.
1012 */
memblock_clear_hotplug(phys_addr_t base,phys_addr_t size)1013 int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
1014 {
1015 return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_HOTPLUG);
1016 }
1017
1018 /**
1019 * memblock_mark_mirror - Mark mirrored memory with flag MEMBLOCK_MIRROR.
1020 * @base: the base phys addr of the region
1021 * @size: the size of the region
1022 *
1023 * Return: 0 on success, -errno on failure.
1024 */
memblock_mark_mirror(phys_addr_t base,phys_addr_t size)1025 int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
1026 {
1027 if (!mirrored_kernelcore)
1028 return 0;
1029
1030 system_has_some_mirror = true;
1031
1032 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_MIRROR);
1033 }
1034
1035 /**
1036 * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP.
1037 * @base: the base phys addr of the region
1038 * @size: the size of the region
1039 *
1040 * The memory regions marked with %MEMBLOCK_NOMAP will not be added to the
1041 * direct mapping of the physical memory. These regions will still be
1042 * covered by the memory map. The struct page representing NOMAP memory
1043 * frames in the memory map will be PageReserved()
1044 *
1045 * Note: if the memory being marked %MEMBLOCK_NOMAP was allocated from
1046 * memblock, the caller must inform kmemleak to ignore that memory
1047 *
1048 * Return: 0 on success, -errno on failure.
1049 */
memblock_mark_nomap(phys_addr_t base,phys_addr_t size)1050 int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
1051 {
1052 return memblock_setclr_flag(&memblock.memory, base, size, 1, MEMBLOCK_NOMAP);
1053 }
1054
1055 /**
1056 * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region.
1057 * @base: the base phys addr of the region
1058 * @size: the size of the region
1059 *
1060 * Return: 0 on success, -errno on failure.
1061 */
memblock_clear_nomap(phys_addr_t base,phys_addr_t size)1062 int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
1063 {
1064 return memblock_setclr_flag(&memblock.memory, base, size, 0, MEMBLOCK_NOMAP);
1065 }
1066
1067 /**
1068 * memblock_reserved_mark_noinit - Mark a reserved memory region with flag
1069 * MEMBLOCK_RSRV_NOINIT which results in the struct pages not being initialized
1070 * for this region.
1071 * @base: the base phys addr of the region
1072 * @size: the size of the region
1073 *
1074 * struct pages will not be initialized for reserved memory regions marked with
1075 * %MEMBLOCK_RSRV_NOINIT.
1076 *
1077 * Return: 0 on success, -errno on failure.
1078 */
memblock_reserved_mark_noinit(phys_addr_t base,phys_addr_t size)1079 int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size)
1080 {
1081 return memblock_setclr_flag(&memblock.reserved, base, size, 1,
1082 MEMBLOCK_RSRV_NOINIT);
1083 }
1084
should_skip_region(struct memblock_type * type,struct memblock_region * m,int nid,int flags)1085 static bool should_skip_region(struct memblock_type *type,
1086 struct memblock_region *m,
1087 int nid, int flags)
1088 {
1089 int m_nid = memblock_get_region_node(m);
1090
1091 /* we never skip regions when iterating memblock.reserved or physmem */
1092 if (type != memblock_memory)
1093 return false;
1094
1095 /* only memory regions are associated with nodes, check it */
1096 if (numa_valid_node(nid) && nid != m_nid)
1097 return true;
1098
1099 /* skip hotpluggable memory regions if needed */
1100 if (movable_node_is_enabled() && memblock_is_hotpluggable(m) &&
1101 !(flags & MEMBLOCK_HOTPLUG))
1102 return true;
1103
1104 /* if we want mirror memory skip non-mirror memory regions */
1105 if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
1106 return true;
1107
1108 /* skip nomap memory unless we were asked for it explicitly */
1109 if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
1110 return true;
1111
1112 /* skip driver-managed memory unless we were asked for it explicitly */
1113 if (!(flags & MEMBLOCK_DRIVER_MANAGED) && memblock_is_driver_managed(m))
1114 return true;
1115
1116 return false;
1117 }
1118
1119 /**
1120 * __next_mem_range - next function for for_each_free_mem_range() etc.
1121 * @idx: pointer to u64 loop variable
1122 * @nid: node selector, %NUMA_NO_NODE for all nodes
1123 * @flags: pick from blocks based on memory attributes
1124 * @type_a: pointer to memblock_type from where the range is taken
1125 * @type_b: pointer to memblock_type which excludes memory from being taken
1126 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
1127 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
1128 * @out_nid: ptr to int for nid of the range, can be %NULL
1129 *
1130 * Find the first area from *@idx which matches @nid, fill the out
1131 * parameters, and update *@idx for the next iteration. The lower 32bit of
1132 * *@idx contains index into type_a and the upper 32bit indexes the
1133 * areas before each region in type_b. For example, if type_b regions
1134 * look like the following,
1135 *
1136 * 0:[0-16), 1:[32-48), 2:[128-130)
1137 *
1138 * The upper 32bit indexes the following regions.
1139 *
1140 * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX)
1141 *
1142 * As both region arrays are sorted, the function advances the two indices
1143 * in lockstep and returns each intersection.
1144 */
__next_mem_range(u64 * idx,int nid,enum memblock_flags flags,struct memblock_type * type_a,struct memblock_type * type_b,phys_addr_t * out_start,phys_addr_t * out_end,int * out_nid)1145 void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags,
1146 struct memblock_type *type_a,
1147 struct memblock_type *type_b, phys_addr_t *out_start,
1148 phys_addr_t *out_end, int *out_nid)
1149 {
1150 int idx_a = *idx & 0xffffffff;
1151 int idx_b = *idx >> 32;
1152
1153 for (; idx_a < type_a->cnt; idx_a++) {
1154 struct memblock_region *m = &type_a->regions[idx_a];
1155
1156 phys_addr_t m_start = m->base;
1157 phys_addr_t m_end = m->base + m->size;
1158 int m_nid = memblock_get_region_node(m);
1159
1160 if (should_skip_region(type_a, m, nid, flags))
1161 continue;
1162
1163 if (!type_b) {
1164 if (out_start)
1165 *out_start = m_start;
1166 if (out_end)
1167 *out_end = m_end;
1168 if (out_nid)
1169 *out_nid = m_nid;
1170 idx_a++;
1171 *idx = (u32)idx_a | (u64)idx_b << 32;
1172 return;
1173 }
1174
1175 /* scan areas before each reservation */
1176 for (; idx_b < type_b->cnt + 1; idx_b++) {
1177 struct memblock_region *r;
1178 phys_addr_t r_start;
1179 phys_addr_t r_end;
1180
1181 r = &type_b->regions[idx_b];
1182 r_start = idx_b ? r[-1].base + r[-1].size : 0;
1183 r_end = idx_b < type_b->cnt ?
1184 r->base : PHYS_ADDR_MAX;
1185
1186 /*
1187 * if idx_b advanced past idx_a,
1188 * break out to advance idx_a
1189 */
1190 if (r_start >= m_end)
1191 break;
1192 /* if the two regions intersect, we're done */
1193 if (m_start < r_end) {
1194 if (out_start)
1195 *out_start =
1196 max(m_start, r_start);
1197 if (out_end)
1198 *out_end = min(m_end, r_end);
1199 if (out_nid)
1200 *out_nid = m_nid;
1201 /*
1202 * The region which ends first is
1203 * advanced for the next iteration.
1204 */
1205 if (m_end <= r_end)
1206 idx_a++;
1207 else
1208 idx_b++;
1209 *idx = (u32)idx_a | (u64)idx_b << 32;
1210 return;
1211 }
1212 }
1213 }
1214
1215 /* signal end of iteration */
1216 *idx = ULLONG_MAX;
1217 }
1218
1219 /**
1220 * __next_mem_range_rev - generic next function for for_each_*_range_rev()
1221 *
1222 * @idx: pointer to u64 loop variable
1223 * @nid: node selector, %NUMA_NO_NODE for all nodes
1224 * @flags: pick from blocks based on memory attributes
1225 * @type_a: pointer to memblock_type from where the range is taken
1226 * @type_b: pointer to memblock_type which excludes memory from being taken
1227 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
1228 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
1229 * @out_nid: ptr to int for nid of the range, can be %NULL
1230 *
1231 * Finds the next range from type_a which is not marked as unsuitable
1232 * in type_b.
1233 *
1234 * Reverse of __next_mem_range().
1235 */
__next_mem_range_rev(u64 * idx,int nid,enum memblock_flags flags,struct memblock_type * type_a,struct memblock_type * type_b,phys_addr_t * out_start,phys_addr_t * out_end,int * out_nid)1236 void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
1237 enum memblock_flags flags,
1238 struct memblock_type *type_a,
1239 struct memblock_type *type_b,
1240 phys_addr_t *out_start,
1241 phys_addr_t *out_end, int *out_nid)
1242 {
1243 int idx_a = *idx & 0xffffffff;
1244 int idx_b = *idx >> 32;
1245
1246 if (*idx == (u64)ULLONG_MAX) {
1247 idx_a = type_a->cnt - 1;
1248 if (type_b != NULL)
1249 idx_b = type_b->cnt;
1250 else
1251 idx_b = 0;
1252 }
1253
1254 for (; idx_a >= 0; idx_a--) {
1255 struct memblock_region *m = &type_a->regions[idx_a];
1256
1257 phys_addr_t m_start = m->base;
1258 phys_addr_t m_end = m->base + m->size;
1259 int m_nid = memblock_get_region_node(m);
1260
1261 if (should_skip_region(type_a, m, nid, flags))
1262 continue;
1263
1264 if (!type_b) {
1265 if (out_start)
1266 *out_start = m_start;
1267 if (out_end)
1268 *out_end = m_end;
1269 if (out_nid)
1270 *out_nid = m_nid;
1271 idx_a--;
1272 *idx = (u32)idx_a | (u64)idx_b << 32;
1273 return;
1274 }
1275
1276 /* scan areas before each reservation */
1277 for (; idx_b >= 0; idx_b--) {
1278 struct memblock_region *r;
1279 phys_addr_t r_start;
1280 phys_addr_t r_end;
1281
1282 r = &type_b->regions[idx_b];
1283 r_start = idx_b ? r[-1].base + r[-1].size : 0;
1284 r_end = idx_b < type_b->cnt ?
1285 r->base : PHYS_ADDR_MAX;
1286 /*
1287 * if idx_b advanced past idx_a,
1288 * break out to advance idx_a
1289 */
1290
1291 if (r_end <= m_start)
1292 break;
1293 /* if the two regions intersect, we're done */
1294 if (m_end > r_start) {
1295 if (out_start)
1296 *out_start = max(m_start, r_start);
1297 if (out_end)
1298 *out_end = min(m_end, r_end);
1299 if (out_nid)
1300 *out_nid = m_nid;
1301 if (m_start >= r_start)
1302 idx_a--;
1303 else
1304 idx_b--;
1305 *idx = (u32)idx_a | (u64)idx_b << 32;
1306 return;
1307 }
1308 }
1309 }
1310 /* signal end of iteration */
1311 *idx = ULLONG_MAX;
1312 }
1313
1314 /*
1315 * Common iterator interface used to define for_each_mem_pfn_range().
1316 */
__next_mem_pfn_range(int * idx,int nid,unsigned long * out_start_pfn,unsigned long * out_end_pfn,int * out_nid)1317 void __init_memblock __next_mem_pfn_range(int *idx, int nid,
1318 unsigned long *out_start_pfn,
1319 unsigned long *out_end_pfn, int *out_nid)
1320 {
1321 struct memblock_type *type = &memblock.memory;
1322 struct memblock_region *r;
1323 int r_nid;
1324
1325 while (++*idx < type->cnt) {
1326 r = &type->regions[*idx];
1327 r_nid = memblock_get_region_node(r);
1328
1329 if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
1330 continue;
1331 if (!numa_valid_node(nid) || nid == r_nid)
1332 break;
1333 }
1334 if (*idx >= type->cnt) {
1335 *idx = -1;
1336 return;
1337 }
1338
1339 if (out_start_pfn)
1340 *out_start_pfn = PFN_UP(r->base);
1341 if (out_end_pfn)
1342 *out_end_pfn = PFN_DOWN(r->base + r->size);
1343 if (out_nid)
1344 *out_nid = r_nid;
1345 }
1346
1347 /**
1348 * memblock_set_node - set node ID on memblock regions
1349 * @base: base of area to set node ID for
1350 * @size: size of area to set node ID for
1351 * @type: memblock type to set node ID for
1352 * @nid: node ID to set
1353 *
1354 * Set the nid of memblock @type regions in [@base, @base + @size) to @nid.
1355 * Regions which cross the area boundaries are split as necessary.
1356 *
1357 * Return:
1358 * 0 on success, -errno on failure.
1359 */
memblock_set_node(phys_addr_t base,phys_addr_t size,struct memblock_type * type,int nid)1360 int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
1361 struct memblock_type *type, int nid)
1362 {
1363 #ifdef CONFIG_NUMA
1364 int start_rgn, end_rgn;
1365 int i, ret;
1366
1367 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
1368 if (ret)
1369 return ret;
1370
1371 for (i = start_rgn; i < end_rgn; i++)
1372 memblock_set_region_node(&type->regions[i], nid);
1373
1374 memblock_merge_regions(type, start_rgn, end_rgn);
1375 #endif
1376 return 0;
1377 }
1378
1379 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1380 /**
1381 * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone()
1382 *
1383 * @idx: pointer to u64 loop variable
1384 * @zone: zone in which all of the memory blocks reside
1385 * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL
1386 * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL
1387 *
1388 * This function is meant to be a zone/pfn specific wrapper for the
1389 * for_each_mem_range type iterators. Specifically they are used in the
1390 * deferred memory init routines and as such we were duplicating much of
1391 * this logic throughout the code. So instead of having it in multiple
1392 * locations it seemed like it would make more sense to centralize this to
1393 * one new iterator that does everything they need.
1394 */
1395 void __init_memblock
__next_mem_pfn_range_in_zone(u64 * idx,struct zone * zone,unsigned long * out_spfn,unsigned long * out_epfn)1396 __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
1397 unsigned long *out_spfn, unsigned long *out_epfn)
1398 {
1399 int zone_nid = zone_to_nid(zone);
1400 phys_addr_t spa, epa;
1401
1402 __next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
1403 &memblock.memory, &memblock.reserved,
1404 &spa, &epa, NULL);
1405
1406 while (*idx != U64_MAX) {
1407 unsigned long epfn = PFN_DOWN(epa);
1408 unsigned long spfn = PFN_UP(spa);
1409
1410 /*
1411 * Verify the end is at least past the start of the zone and
1412 * that we have at least one PFN to initialize.
1413 */
1414 if (zone->zone_start_pfn < epfn && spfn < epfn) {
1415 /* if we went too far just stop searching */
1416 if (zone_end_pfn(zone) <= spfn) {
1417 *idx = U64_MAX;
1418 break;
1419 }
1420
1421 if (out_spfn)
1422 *out_spfn = max(zone->zone_start_pfn, spfn);
1423 if (out_epfn)
1424 *out_epfn = min(zone_end_pfn(zone), epfn);
1425
1426 return;
1427 }
1428
1429 __next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
1430 &memblock.memory, &memblock.reserved,
1431 &spa, &epa, NULL);
1432 }
1433
1434 /* signal end of iteration */
1435 if (out_spfn)
1436 *out_spfn = ULONG_MAX;
1437 if (out_epfn)
1438 *out_epfn = 0;
1439 }
1440
1441 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1442
1443 /**
1444 * memblock_alloc_range_nid - allocate boot memory block
1445 * @size: size of memory block to be allocated in bytes
1446 * @align: alignment of the region and block's size
1447 * @start: the lower bound of the memory region to allocate (phys address)
1448 * @end: the upper bound of the memory region to allocate (phys address)
1449 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1450 * @exact_nid: control the allocation fall back to other nodes
1451 *
1452 * The allocation is performed from memory region limited by
1453 * memblock.current_limit if @end == %MEMBLOCK_ALLOC_ACCESSIBLE.
1454 *
1455 * If the specified node can not hold the requested memory and @exact_nid
1456 * is false, the allocation falls back to any node in the system.
1457 *
1458 * For systems with memory mirroring, the allocation is attempted first
1459 * from the regions with mirroring enabled and then retried from any
1460 * memory region.
1461 *
1462 * In addition, function using kmemleak_alloc_phys for allocated boot
1463 * memory block, it is never reported as leaks.
1464 *
1465 * Return:
1466 * Physical address of allocated memory block on success, %0 on failure.
1467 */
memblock_alloc_range_nid(phys_addr_t size,phys_addr_t align,phys_addr_t start,phys_addr_t end,int nid,bool exact_nid)1468 phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
1469 phys_addr_t align, phys_addr_t start,
1470 phys_addr_t end, int nid,
1471 bool exact_nid)
1472 {
1473 enum memblock_flags flags = choose_memblock_flags();
1474 phys_addr_t found;
1475
1476 /*
1477 * Detect any accidental use of these APIs after slab is ready, as at
1478 * this moment memblock may be deinitialized already and its
1479 * internal data may be destroyed (after execution of memblock_free_all)
1480 */
1481 if (WARN_ON_ONCE(slab_is_available())) {
1482 void *vaddr = kzalloc_node(size, GFP_NOWAIT, nid);
1483
1484 return vaddr ? virt_to_phys(vaddr) : 0;
1485 }
1486
1487 if (!align) {
1488 /* Can't use WARNs this early in boot on powerpc */
1489 dump_stack();
1490 align = SMP_CACHE_BYTES;
1491 }
1492
1493 again:
1494 found = memblock_find_in_range_node(size, align, start, end, nid,
1495 flags);
1496 if (found && !memblock_reserve(found, size))
1497 goto done;
1498
1499 if (numa_valid_node(nid) && !exact_nid) {
1500 found = memblock_find_in_range_node(size, align, start,
1501 end, NUMA_NO_NODE,
1502 flags);
1503 if (found && !memblock_reserve(found, size))
1504 goto done;
1505 }
1506
1507 if (flags & MEMBLOCK_MIRROR) {
1508 flags &= ~MEMBLOCK_MIRROR;
1509 pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n",
1510 &size);
1511 goto again;
1512 }
1513
1514 return 0;
1515
1516 done:
1517 /*
1518 * Skip kmemleak for those places like kasan_init() and
1519 * early_pgtable_alloc() due to high volume.
1520 */
1521 if (end != MEMBLOCK_ALLOC_NOLEAKTRACE)
1522 /*
1523 * Memblock allocated blocks are never reported as
1524 * leaks. This is because many of these blocks are
1525 * only referred via the physical address which is
1526 * not looked up by kmemleak.
1527 */
1528 kmemleak_alloc_phys(found, size, 0);
1529
1530 /*
1531 * Some Virtual Machine platforms, such as Intel TDX or AMD SEV-SNP,
1532 * require memory to be accepted before it can be used by the
1533 * guest.
1534 *
1535 * Accept the memory of the allocated buffer.
1536 */
1537 accept_memory(found, size);
1538
1539 return found;
1540 }
1541
1542 /**
1543 * memblock_phys_alloc_range - allocate a memory block inside specified range
1544 * @size: size of memory block to be allocated in bytes
1545 * @align: alignment of the region and block's size
1546 * @start: the lower bound of the memory region to allocate (physical address)
1547 * @end: the upper bound of the memory region to allocate (physical address)
1548 *
1549 * Allocate @size bytes in the between @start and @end.
1550 *
1551 * Return: physical address of the allocated memory block on success,
1552 * %0 on failure.
1553 */
memblock_phys_alloc_range(phys_addr_t size,phys_addr_t align,phys_addr_t start,phys_addr_t end)1554 phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
1555 phys_addr_t align,
1556 phys_addr_t start,
1557 phys_addr_t end)
1558 {
1559 memblock_dbg("%s: %llu bytes align=0x%llx from=%pa max_addr=%pa %pS\n",
1560 __func__, (u64)size, (u64)align, &start, &end,
1561 (void *)_RET_IP_);
1562 return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
1563 false);
1564 }
1565
1566 /**
1567 * memblock_phys_alloc_try_nid - allocate a memory block from specified NUMA node
1568 * @size: size of memory block to be allocated in bytes
1569 * @align: alignment of the region and block's size
1570 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1571 *
1572 * Allocates memory block from the specified NUMA node. If the node
1573 * has no available memory, attempts to allocated from any node in the
1574 * system.
1575 *
1576 * Return: physical address of the allocated memory block on success,
1577 * %0 on failure.
1578 */
memblock_phys_alloc_try_nid(phys_addr_t size,phys_addr_t align,int nid)1579 phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
1580 {
1581 return memblock_alloc_range_nid(size, align, 0,
1582 MEMBLOCK_ALLOC_ACCESSIBLE, nid, false);
1583 }
1584
1585 /**
1586 * memblock_alloc_internal - allocate boot memory block
1587 * @size: size of memory block to be allocated in bytes
1588 * @align: alignment of the region and block's size
1589 * @min_addr: the lower bound of the memory region to allocate (phys address)
1590 * @max_addr: the upper bound of the memory region to allocate (phys address)
1591 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1592 * @exact_nid: control the allocation fall back to other nodes
1593 *
1594 * Allocates memory block using memblock_alloc_range_nid() and
1595 * converts the returned physical address to virtual.
1596 *
1597 * The @min_addr limit is dropped if it can not be satisfied and the allocation
1598 * will fall back to memory below @min_addr. Other constraints, such
1599 * as node and mirrored memory will be handled again in
1600 * memblock_alloc_range_nid().
1601 *
1602 * Return:
1603 * Virtual address of allocated memory block on success, NULL on failure.
1604 */
memblock_alloc_internal(phys_addr_t size,phys_addr_t align,phys_addr_t min_addr,phys_addr_t max_addr,int nid,bool exact_nid)1605 static void * __init memblock_alloc_internal(
1606 phys_addr_t size, phys_addr_t align,
1607 phys_addr_t min_addr, phys_addr_t max_addr,
1608 int nid, bool exact_nid)
1609 {
1610 phys_addr_t alloc;
1611
1612
1613 if (max_addr > memblock.current_limit)
1614 max_addr = memblock.current_limit;
1615
1616 alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid,
1617 exact_nid);
1618
1619 /* retry allocation without lower limit */
1620 if (!alloc && min_addr)
1621 alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid,
1622 exact_nid);
1623
1624 if (!alloc)
1625 return NULL;
1626
1627 return phys_to_virt(alloc);
1628 }
1629
1630 /**
1631 * memblock_alloc_exact_nid_raw - allocate boot memory block on the exact node
1632 * without zeroing memory
1633 * @size: size of memory block to be allocated in bytes
1634 * @align: alignment of the region and block's size
1635 * @min_addr: the lower bound of the memory region from where the allocation
1636 * is preferred (phys address)
1637 * @max_addr: the upper bound of the memory region from where the allocation
1638 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to
1639 * allocate only from memory limited by memblock.current_limit value
1640 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1641 *
1642 * Public function, provides additional debug information (including caller
1643 * info), if enabled. Does not zero allocated memory.
1644 *
1645 * Return:
1646 * Virtual address of allocated memory block on success, NULL on failure.
1647 */
memblock_alloc_exact_nid_raw(phys_addr_t size,phys_addr_t align,phys_addr_t min_addr,phys_addr_t max_addr,int nid)1648 void * __init memblock_alloc_exact_nid_raw(
1649 phys_addr_t size, phys_addr_t align,
1650 phys_addr_t min_addr, phys_addr_t max_addr,
1651 int nid)
1652 {
1653 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
1654 __func__, (u64)size, (u64)align, nid, &min_addr,
1655 &max_addr, (void *)_RET_IP_);
1656
1657 return memblock_alloc_internal(size, align, min_addr, max_addr, nid,
1658 true);
1659 }
1660
1661 /**
1662 * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing
1663 * memory and without panicking
1664 * @size: size of memory block to be allocated in bytes
1665 * @align: alignment of the region and block's size
1666 * @min_addr: the lower bound of the memory region from where the allocation
1667 * is preferred (phys address)
1668 * @max_addr: the upper bound of the memory region from where the allocation
1669 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to
1670 * allocate only from memory limited by memblock.current_limit value
1671 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1672 *
1673 * Public function, provides additional debug information (including caller
1674 * info), if enabled. Does not zero allocated memory, does not panic if request
1675 * cannot be satisfied.
1676 *
1677 * Return:
1678 * Virtual address of allocated memory block on success, NULL on failure.
1679 */
memblock_alloc_try_nid_raw(phys_addr_t size,phys_addr_t align,phys_addr_t min_addr,phys_addr_t max_addr,int nid)1680 void * __init memblock_alloc_try_nid_raw(
1681 phys_addr_t size, phys_addr_t align,
1682 phys_addr_t min_addr, phys_addr_t max_addr,
1683 int nid)
1684 {
1685 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
1686 __func__, (u64)size, (u64)align, nid, &min_addr,
1687 &max_addr, (void *)_RET_IP_);
1688
1689 return memblock_alloc_internal(size, align, min_addr, max_addr, nid,
1690 false);
1691 }
1692
1693 /**
1694 * memblock_alloc_try_nid - allocate boot memory block
1695 * @size: size of memory block to be allocated in bytes
1696 * @align: alignment of the region and block's size
1697 * @min_addr: the lower bound of the memory region from where the allocation
1698 * is preferred (phys address)
1699 * @max_addr: the upper bound of the memory region from where the allocation
1700 * is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to
1701 * allocate only from memory limited by memblock.current_limit value
1702 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
1703 *
1704 * Public function, provides additional debug information (including caller
1705 * info), if enabled. This function zeroes the allocated memory.
1706 *
1707 * Return:
1708 * Virtual address of allocated memory block on success, NULL on failure.
1709 */
memblock_alloc_try_nid(phys_addr_t size,phys_addr_t align,phys_addr_t min_addr,phys_addr_t max_addr,int nid)1710 void * __init memblock_alloc_try_nid(
1711 phys_addr_t size, phys_addr_t align,
1712 phys_addr_t min_addr, phys_addr_t max_addr,
1713 int nid)
1714 {
1715 void *ptr;
1716
1717 memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
1718 __func__, (u64)size, (u64)align, nid, &min_addr,
1719 &max_addr, (void *)_RET_IP_);
1720 ptr = memblock_alloc_internal(size, align,
1721 min_addr, max_addr, nid, false);
1722 if (ptr)
1723 memset(ptr, 0, size);
1724
1725 return ptr;
1726 }
1727
1728 /**
1729 * memblock_free_late - free pages directly to buddy allocator
1730 * @base: phys starting address of the boot memory block
1731 * @size: size of the boot memory block in bytes
1732 *
1733 * This is only useful when the memblock allocator has already been torn
1734 * down, but we are still initializing the system. Pages are released directly
1735 * to the buddy allocator.
1736 */
memblock_free_late(phys_addr_t base,phys_addr_t size)1737 void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
1738 {
1739 phys_addr_t cursor, end;
1740
1741 end = base + size - 1;
1742 memblock_dbg("%s: [%pa-%pa] %pS\n",
1743 __func__, &base, &end, (void *)_RET_IP_);
1744 kmemleak_free_part_phys(base, size);
1745 cursor = PFN_UP(base);
1746 end = PFN_DOWN(base + size);
1747
1748 memblock_memsize_mod_kernel_size(-1 * ((long)(end - cursor) << PAGE_SHIFT));
1749
1750 for (; cursor < end; cursor++) {
1751 memblock_free_pages(pfn_to_page(cursor), cursor, 0);
1752 totalram_pages_inc();
1753 }
1754 }
1755
1756 /*
1757 * Remaining API functions
1758 */
1759
memblock_phys_mem_size(void)1760 phys_addr_t __init_memblock memblock_phys_mem_size(void)
1761 {
1762 return memblock.memory.total_size;
1763 }
1764
memblock_reserved_size(void)1765 phys_addr_t __init_memblock memblock_reserved_size(void)
1766 {
1767 return memblock.reserved.total_size;
1768 }
1769
1770 /**
1771 * memblock_estimated_nr_free_pages - return estimated number of free pages
1772 * from memblock point of view
1773 *
1774 * During bootup, subsystems might need a rough estimate of the number of free
1775 * pages in the whole system, before precise numbers are available from the
1776 * buddy. Especially with CONFIG_DEFERRED_STRUCT_PAGE_INIT, the numbers
1777 * obtained from the buddy might be very imprecise during bootup.
1778 *
1779 * Return:
1780 * An estimated number of free pages from memblock point of view.
1781 */
memblock_estimated_nr_free_pages(void)1782 unsigned long __init memblock_estimated_nr_free_pages(void)
1783 {
1784 return PHYS_PFN(memblock_phys_mem_size() - memblock_reserved_size());
1785 }
1786
1787 /* lowest address */
memblock_start_of_DRAM(void)1788 phys_addr_t __init_memblock memblock_start_of_DRAM(void)
1789 {
1790 return memblock.memory.regions[0].base;
1791 }
1792
memblock_end_of_DRAM(void)1793 phys_addr_t __init_memblock memblock_end_of_DRAM(void)
1794 {
1795 int idx = memblock.memory.cnt - 1;
1796
1797 return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
1798 }
1799 EXPORT_SYMBOL_GPL(memblock_end_of_DRAM);
1800
__find_max_addr(phys_addr_t limit)1801 static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
1802 {
1803 phys_addr_t max_addr = PHYS_ADDR_MAX;
1804 struct memblock_region *r;
1805
1806 /*
1807 * translate the memory @limit size into the max address within one of
1808 * the memory memblock regions, if the @limit exceeds the total size
1809 * of those regions, max_addr will keep original value PHYS_ADDR_MAX
1810 */
1811 for_each_mem_region(r) {
1812 if (limit <= r->size) {
1813 max_addr = r->base + limit;
1814 break;
1815 }
1816 limit -= r->size;
1817 }
1818
1819 return max_addr;
1820 }
1821
memblock_enforce_memory_limit(phys_addr_t limit)1822 void __init memblock_enforce_memory_limit(phys_addr_t limit)
1823 {
1824 phys_addr_t max_addr;
1825
1826 if (!limit)
1827 return;
1828
1829 max_addr = __find_max_addr(limit);
1830
1831 /* @limit exceeds the total size of the memory, do nothing */
1832 if (max_addr == PHYS_ADDR_MAX)
1833 return;
1834
1835 /* truncate both memory and reserved regions */
1836 memblock_remove_range(&memblock.memory, max_addr,
1837 PHYS_ADDR_MAX);
1838 memblock_remove_range(&memblock.reserved, max_addr,
1839 PHYS_ADDR_MAX);
1840 }
1841
memblock_cap_memory_range(phys_addr_t base,phys_addr_t size)1842 void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
1843 {
1844 int start_rgn, end_rgn;
1845 int i, ret;
1846
1847 if (!size)
1848 return;
1849
1850 if (!memblock_memory->total_size) {
1851 pr_warn("%s: No memory registered yet\n", __func__);
1852 return;
1853 }
1854
1855 ret = memblock_isolate_range(&memblock.memory, base, size,
1856 &start_rgn, &end_rgn);
1857 if (ret)
1858 return;
1859
1860 /* remove all the MAP regions */
1861 for (i = memblock.memory.cnt - 1; i >= end_rgn; i--)
1862 if (!memblock_is_nomap(&memblock.memory.regions[i]))
1863 memblock_remove_region(&memblock.memory, i);
1864
1865 for (i = start_rgn - 1; i >= 0; i--)
1866 if (!memblock_is_nomap(&memblock.memory.regions[i]))
1867 memblock_remove_region(&memblock.memory, i);
1868
1869 /* truncate the reserved regions */
1870 memblock_remove_range(&memblock.reserved, 0, base);
1871 memblock_remove_range(&memblock.reserved,
1872 base + size, PHYS_ADDR_MAX);
1873 }
1874
memblock_mem_limit_remove_map(phys_addr_t limit)1875 void __init memblock_mem_limit_remove_map(phys_addr_t limit)
1876 {
1877 phys_addr_t max_addr;
1878
1879 if (!limit)
1880 return;
1881
1882 max_addr = __find_max_addr(limit);
1883
1884 /* @limit exceeds the total size of the memory, do nothing */
1885 if (max_addr == PHYS_ADDR_MAX)
1886 return;
1887
1888 memblock_cap_memory_range(0, max_addr);
1889 }
1890
memblock_search(struct memblock_type * type,phys_addr_t addr)1891 static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
1892 {
1893 unsigned int left = 0, right = type->cnt;
1894
1895 do {
1896 unsigned int mid = (right + left) / 2;
1897
1898 if (addr < type->regions[mid].base)
1899 right = mid;
1900 else if (addr >= (type->regions[mid].base +
1901 type->regions[mid].size))
1902 left = mid + 1;
1903 else
1904 return mid;
1905 } while (left < right);
1906 return -1;
1907 }
1908
memblock_is_reserved(phys_addr_t addr)1909 bool __init_memblock memblock_is_reserved(phys_addr_t addr)
1910 {
1911 return memblock_search(&memblock.reserved, addr) != -1;
1912 }
1913
memblock_is_memory(phys_addr_t addr)1914 bool __init_memblock memblock_is_memory(phys_addr_t addr)
1915 {
1916 return memblock_search(&memblock.memory, addr) != -1;
1917 }
1918
memblock_is_map_memory(phys_addr_t addr)1919 bool __init_memblock memblock_is_map_memory(phys_addr_t addr)
1920 {
1921 int i = memblock_search(&memblock.memory, addr);
1922
1923 if (i == -1)
1924 return false;
1925 return !memblock_is_nomap(&memblock.memory.regions[i]);
1926 }
1927
memblock_search_pfn_nid(unsigned long pfn,unsigned long * start_pfn,unsigned long * end_pfn)1928 int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
1929 unsigned long *start_pfn, unsigned long *end_pfn)
1930 {
1931 struct memblock_type *type = &memblock.memory;
1932 int mid = memblock_search(type, PFN_PHYS(pfn));
1933
1934 if (mid == -1)
1935 return NUMA_NO_NODE;
1936
1937 *start_pfn = PFN_DOWN(type->regions[mid].base);
1938 *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size);
1939
1940 return memblock_get_region_node(&type->regions[mid]);
1941 }
1942
1943 /**
1944 * memblock_is_region_memory - check if a region is a subset of memory
1945 * @base: base of region to check
1946 * @size: size of region to check
1947 *
1948 * Check if the region [@base, @base + @size) is a subset of a memory block.
1949 *
1950 * Return:
1951 * 0 if false, non-zero if true
1952 */
memblock_is_region_memory(phys_addr_t base,phys_addr_t size)1953 bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
1954 {
1955 int idx = memblock_search(&memblock.memory, base);
1956 phys_addr_t end = base + memblock_cap_size(base, &size);
1957
1958 if (idx == -1)
1959 return false;
1960 return (memblock.memory.regions[idx].base +
1961 memblock.memory.regions[idx].size) >= end;
1962 }
1963
1964 /**
1965 * memblock_is_region_reserved - check if a region intersects reserved memory
1966 * @base: base of region to check
1967 * @size: size of region to check
1968 *
1969 * Check if the region [@base, @base + @size) intersects a reserved
1970 * memory block.
1971 *
1972 * Return:
1973 * True if they intersect, false if not.
1974 */
memblock_is_region_reserved(phys_addr_t base,phys_addr_t size)1975 bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
1976 {
1977 return memblock_overlaps_region(&memblock.reserved, base, size);
1978 }
1979
memblock_trim_memory(phys_addr_t align)1980 void __init_memblock memblock_trim_memory(phys_addr_t align)
1981 {
1982 phys_addr_t start, end, orig_start, orig_end;
1983 struct memblock_region *r;
1984
1985 for_each_mem_region(r) {
1986 orig_start = r->base;
1987 orig_end = r->base + r->size;
1988 start = round_up(orig_start, align);
1989 end = round_down(orig_end, align);
1990
1991 if (start == orig_start && end == orig_end)
1992 continue;
1993
1994 if (start < end) {
1995 r->base = start;
1996 r->size = end - start;
1997 } else {
1998 memblock_remove_region(&memblock.memory,
1999 r - memblock.memory.regions);
2000 r--;
2001 }
2002 }
2003 }
2004
memblock_set_current_limit(phys_addr_t limit)2005 void __init_memblock memblock_set_current_limit(phys_addr_t limit)
2006 {
2007 memblock.current_limit = limit;
2008 }
2009
memblock_get_current_limit(void)2010 phys_addr_t __init_memblock memblock_get_current_limit(void)
2011 {
2012 return memblock.current_limit;
2013 }
2014
memblock_dump(struct memblock_type * type)2015 static void __init_memblock memblock_dump(struct memblock_type *type)
2016 {
2017 phys_addr_t base, end, size;
2018 enum memblock_flags flags;
2019 int idx;
2020 struct memblock_region *rgn;
2021
2022 pr_info(" %s.cnt = 0x%lx\n", type->name, type->cnt);
2023
2024 for_each_memblock_type(idx, type, rgn) {
2025 char nid_buf[32] = "";
2026
2027 base = rgn->base;
2028 size = rgn->size;
2029 end = base + size - 1;
2030 flags = rgn->flags;
2031 #ifdef CONFIG_NUMA
2032 if (numa_valid_node(memblock_get_region_node(rgn)))
2033 snprintf(nid_buf, sizeof(nid_buf), " on node %d",
2034 memblock_get_region_node(rgn));
2035 #endif
2036 pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#x\n",
2037 type->name, idx, &base, &end, &size, nid_buf, flags);
2038 }
2039 }
2040
__memblock_dump_all(void)2041 static void __init_memblock __memblock_dump_all(void)
2042 {
2043 pr_info("MEMBLOCK configuration:\n");
2044 pr_info(" memory size = %pa reserved size = %pa\n",
2045 &memblock.memory.total_size,
2046 &memblock.reserved.total_size);
2047
2048 memblock_dump(&memblock.memory);
2049 memblock_dump(&memblock.reserved);
2050 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
2051 memblock_dump(&physmem);
2052 #endif
2053 }
2054
memblock_dump_all(void)2055 void __init_memblock memblock_dump_all(void)
2056 {
2057 if (memblock_debug)
2058 __memblock_dump_all();
2059 }
2060
memblock_allow_resize(void)2061 void __init memblock_allow_resize(void)
2062 {
2063 memblock_can_resize = 1;
2064 }
2065
early_memblock(char * p)2066 static int __init early_memblock(char *p)
2067 {
2068 if (p && strstr(p, "debug"))
2069 memblock_debug = 1;
2070 return 0;
2071 }
2072 early_param("memblock", early_memblock);
2073
2074 #define NAME_SIZE 100
2075 struct memsize_rgn_struct {
2076 phys_addr_t base;
2077 long size;
2078 bool nomap; /* 1/32 byte */
2079 bool reusable; /* 1/32 byte */
2080 char name[NAME_SIZE]; /* 30/32 byte */
2081 };
2082
2083 #define MAX_MEMBLOCK_MEMSIZE 100
2084
2085 static struct memsize_rgn_struct memsize_rgn[MAX_MEMBLOCK_MEMSIZE] __initdata_memblock;
2086 static int memsize_rgn_count __initdata_memblock;
2087 static long memsize_memmap;
2088 static unsigned long memsize_code __initdata_memblock;
2089 static unsigned long memsize_data __initdata_memblock;
2090 static unsigned long memsize_ro __initdata_memblock;
2091 static unsigned long memsize_bss __initdata_memblock;
2092 static long memsize_reusable_size;
2093
2094 enum memblock_memsize_state {
2095 MEMBLOCK_MEMSIZE_NONE = 0,
2096 MEMBLOCK_MEMSIZE_DEBUGFS,
2097 MEMBLOCK_MEMSIZE_PROCFS,
2098 };
2099
2100 static enum memblock_memsize_state memsize_state __initdata_memblock = MEMBLOCK_MEMSIZE_NONE;
2101
early_memblock_memsize(char * str)2102 static int __init early_memblock_memsize(char *str)
2103 {
2104 if (!str)
2105 return -EINVAL;
2106 if (strcmp(str, "none") == 0)
2107 memsize_state = MEMBLOCK_MEMSIZE_NONE;
2108 else if (strcmp(str, "debugfs") == 0)
2109 memsize_state = MEMBLOCK_MEMSIZE_DEBUGFS;
2110 else if (strcmp(str, "procfs") == 0)
2111 memsize_state = MEMBLOCK_MEMSIZE_PROCFS;
2112 else
2113 return -EINVAL;
2114 return 0;
2115 }
2116 early_param("memblock_memsize", early_memblock_memsize);
2117
memblock_memsize_enable_tracking(void)2118 void __init memblock_memsize_enable_tracking(void)
2119 {
2120 memblock_memsize_tracking = true;
2121 }
2122
memblock_memsize_disable_tracking(void)2123 void __init memblock_memsize_disable_tracking(void)
2124 {
2125 memblock_memsize_tracking = false;
2126 }
2127
memblock_memsize_mod_memmap_size(long size)2128 void __init memblock_memsize_mod_memmap_size(long size)
2129 {
2130 memsize_memmap += size;
2131 }
2132
memblock_memsize_mod_kernel_size(long size)2133 void memblock_memsize_mod_kernel_size(long size)
2134 {
2135 memsize_kinit += size;
2136 }
2137
memblock_memsize_kernel_code_data(unsigned long code,unsigned long data,unsigned long ro,unsigned long bss)2138 void __init memblock_memsize_kernel_code_data(unsigned long code, unsigned long data,
2139 unsigned long ro, unsigned long bss)
2140 {
2141 memsize_code = code;
2142 memsize_data = data;
2143 memsize_ro = ro;
2144 memsize_bss = bss;
2145 }
2146
memsize_get_valid_name(char * valid_name,const char * name)2147 static void __init_memblock memsize_get_valid_name(char *valid_name, const char *name)
2148 {
2149 char *head, *tail, *found;
2150 int valid_size;
2151
2152 head = (char *)name;
2153 tail = head + strlen(name);
2154
2155 /* get tail position after valid char */
2156 found = strchr(name, '@');
2157 if (found)
2158 tail = found;
2159
2160 valid_size = tail - head + 1;
2161 if (valid_size > NAME_SIZE)
2162 valid_size = NAME_SIZE;
2163 strscpy(valid_name, head, valid_size);
2164 }
2165
memblock_memsize_mod_reusable_size(long size)2166 void memblock_memsize_mod_reusable_size(long size)
2167 {
2168 memsize_reusable_size += size;
2169 }
2170
memsize_get_new_rgn(void)2171 static inline struct memsize_rgn_struct * __init_memblock memsize_get_new_rgn(void)
2172 {
2173 if (memsize_rgn_count == ARRAY_SIZE(memsize_rgn)) {
2174 pr_err("not enough space on memsize_rgn\n");
2175 return NULL;
2176 }
2177 return &memsize_rgn[memsize_rgn_count++];
2178 }
2179
memsize_update_nomap_region(const char * name,phys_addr_t base,phys_addr_t size,bool nomap)2180 static bool __init_memblock memsize_update_nomap_region(const char *name, phys_addr_t base,
2181 phys_addr_t size, bool nomap)
2182 {
2183 int i;
2184 struct memsize_rgn_struct *rmem_rgn, *new_rgn;
2185
2186 if (!name)
2187 return false;
2188
2189 for (i = 0; i < memsize_rgn_count; i++) {
2190 rmem_rgn = &memsize_rgn[i];
2191
2192 /* skip either !nomap, !unknown, !overlap */
2193 if (!rmem_rgn->nomap)
2194 continue;
2195 if (strcmp(rmem_rgn->name, "unknown"))
2196 continue;
2197 if (base + size <= rmem_rgn->base)
2198 continue;
2199 if (base >= rmem_rgn->base + rmem_rgn->size)
2200 continue;
2201
2202 /* exactly same */
2203 if (base == rmem_rgn->base && size == rmem_rgn->size) {
2204 memsize_get_valid_name(rmem_rgn->name, name);
2205 return true;
2206 }
2207
2208 /* bigger */
2209 if (base <= rmem_rgn->base &&
2210 base + size >= rmem_rgn->base + rmem_rgn->size) {
2211 memsize_get_valid_name(rmem_rgn->name, name);
2212 rmem_rgn->base = base;
2213 rmem_rgn->size = size;
2214 return true;
2215 }
2216
2217 /* intersect */
2218 if (base < rmem_rgn->base ||
2219 base + size > rmem_rgn->base + rmem_rgn->size) {
2220 new_rgn = memsize_get_new_rgn();
2221 if (!new_rgn)
2222 return true;
2223 new_rgn->base = base;
2224 new_rgn->size = size;
2225 new_rgn->nomap = nomap;
2226 new_rgn->reusable = false;
2227 memsize_get_valid_name(new_rgn->name, name);
2228
2229 if (base < rmem_rgn->base) {
2230 rmem_rgn->size -= base + size - rmem_rgn->base;
2231 rmem_rgn->base = base + size;
2232 } else {
2233 rmem_rgn->size -= rmem_rgn->base
2234 + rmem_rgn->size - base;
2235 }
2236 return true;
2237 }
2238
2239 /* smaller */
2240 new_rgn = memsize_get_new_rgn();
2241 if (!new_rgn)
2242 return true;
2243 new_rgn->base = base;
2244 new_rgn->size = size;
2245 new_rgn->nomap = nomap;
2246 new_rgn->reusable = false;
2247 memsize_get_valid_name(new_rgn->name, name);
2248
2249 if (base == rmem_rgn->base && size < rmem_rgn->size) {
2250 rmem_rgn->base = base + size;
2251 rmem_rgn->size -= size;
2252 } else if (base + size == rmem_rgn->base + rmem_rgn->size) {
2253 rmem_rgn->size -= size;
2254 } else {
2255 new_rgn = memsize_get_new_rgn();
2256 if (!new_rgn)
2257 return true;
2258 new_rgn->base = base + size;
2259 new_rgn->size = (rmem_rgn->base + rmem_rgn->size)
2260 - (base + size);
2261 new_rgn->nomap = nomap;
2262 new_rgn->reusable = false;
2263 strscpy(new_rgn->name, "unknown", sizeof(new_rgn->name));
2264 rmem_rgn->size = base - rmem_rgn->base;
2265 }
2266 return true;
2267 }
2268
2269 return false;
2270 }
2271
memblock_memsize_record(const char * name,phys_addr_t base,phys_addr_t size,bool nomap,bool reusable)2272 void __init_memblock memblock_memsize_record(const char *name, phys_addr_t base,
2273 phys_addr_t size, bool nomap, bool reusable)
2274 {
2275 struct memsize_rgn_struct *rgn;
2276 phys_addr_t end;
2277
2278 if (name && memsize_state == MEMBLOCK_MEMSIZE_NONE)
2279 return;
2280
2281 if (memsize_rgn_count == MAX_MEMBLOCK_MEMSIZE) {
2282 pr_err("not enough space on memsize_rgn\n");
2283 return;
2284 }
2285
2286 if (memsize_update_nomap_region(name, base, size, nomap))
2287 return;
2288
2289 rgn = memsize_get_new_rgn();
2290 if (!rgn)
2291 return;
2292
2293 rgn->base = base;
2294 rgn->size = size;
2295 rgn->nomap = nomap;
2296 rgn->reusable = reusable;
2297
2298 if (!name)
2299 strscpy(rgn->name, "unknown", sizeof(rgn->name));
2300 else
2301 memsize_get_valid_name(rgn->name, name);
2302 end = base + size - 1;
2303 memblock_dbg("%s %pa..%pa nomap:%d reusable:%d\n",
2304 __func__, &base, &end, nomap, reusable);
2305 }
2306
memblock_memsize_detect_hole(void)2307 void __init memblock_memsize_detect_hole(void)
2308 {
2309 phys_addr_t base, end;
2310 phys_addr_t prev_end, hole_sz;
2311 int idx;
2312 struct memblock_region *rgn;
2313 int memblock_cnt = (int)memblock.memory.cnt;
2314
2315 /* assume that the hole size is less than 1 GB */
2316 for_each_memblock_type(idx, (&memblock.memory), rgn) {
2317 prev_end = (idx == 0) ? round_down(rgn->base, SZ_1G) : end;
2318 base = rgn->base;
2319 end = rgn->base + rgn->size;
2320
2321 /* only for the last region, check a hole after the region */
2322 if (idx + 1 == memblock_cnt) {
2323 hole_sz = round_up(end, SZ_1G) - end;
2324 if (hole_sz)
2325 memblock_memsize_record(NULL, end, hole_sz,
2326 true, false);
2327 }
2328
2329 /* for each region, check a hole prior to the region */
2330 hole_sz = base - prev_end;
2331 if (!hole_sz)
2332 continue;
2333 if (hole_sz < SZ_1G) {
2334 memblock_memsize_record(NULL, prev_end, hole_sz, true,
2335 false);
2336 } else {
2337 phys_addr_t hole_sz1, hole_sz2;
2338
2339 hole_sz1 = round_up(prev_end, SZ_1G) - prev_end;
2340 if (hole_sz1)
2341 memblock_memsize_record(NULL, prev_end,
2342 hole_sz1, true, false);
2343 hole_sz2 = base % SZ_1G;
2344 if (hole_sz2)
2345 memblock_memsize_record(NULL, base - hole_sz2,
2346 hole_sz2, true, false);
2347 }
2348 }
2349 }
2350
free_memmap(unsigned long start_pfn,unsigned long end_pfn)2351 static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn)
2352 {
2353 struct page *start_pg, *end_pg;
2354 phys_addr_t pg, pgend;
2355
2356 /*
2357 * Convert start_pfn/end_pfn to a struct page pointer.
2358 */
2359 start_pg = pfn_to_page(start_pfn - 1) + 1;
2360 end_pg = pfn_to_page(end_pfn - 1) + 1;
2361
2362 /*
2363 * Convert to physical addresses, and round start upwards and end
2364 * downwards.
2365 */
2366 pg = PAGE_ALIGN(__pa(start_pg));
2367 pgend = PAGE_ALIGN_DOWN(__pa(end_pg));
2368
2369 /*
2370 * If there are free pages between these, free the section of the
2371 * memmap array.
2372 */
2373 if (pg < pgend)
2374 memblock_phys_free(pg, pgend - pg);
2375 }
2376
2377 /*
2378 * The mem_map array can get very big. Free the unused area of the memory map.
2379 */
free_unused_memmap(void)2380 static void __init free_unused_memmap(void)
2381 {
2382 unsigned long start, end, prev_end = 0;
2383 int i;
2384
2385 if (!IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) ||
2386 IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
2387 return;
2388
2389 /*
2390 * This relies on each bank being in address order.
2391 * The banks are sorted previously in bootmem_init().
2392 */
2393 for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
2394 #ifdef CONFIG_SPARSEMEM
2395 /*
2396 * Take care not to free memmap entries that don't exist
2397 * due to SPARSEMEM sections which aren't present.
2398 */
2399 start = min(start, ALIGN(prev_end, PAGES_PER_SECTION));
2400 #endif
2401 /*
2402 * Align down here since many operations in VM subsystem
2403 * presume that there are no holes in the memory map inside
2404 * a pageblock
2405 */
2406 start = pageblock_start_pfn(start);
2407
2408 /*
2409 * If we had a previous bank, and there is a space
2410 * between the current bank and the previous, free it.
2411 */
2412 if (prev_end && prev_end < start)
2413 free_memmap(prev_end, start);
2414
2415 /*
2416 * Align up here since many operations in VM subsystem
2417 * presume that there are no holes in the memory map inside
2418 * a pageblock
2419 */
2420 prev_end = pageblock_align(end);
2421 }
2422
2423 #ifdef CONFIG_SPARSEMEM
2424 if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) {
2425 prev_end = pageblock_align(end);
2426 free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION));
2427 }
2428 #endif
2429 }
2430
__free_pages_memory(unsigned long start,unsigned long end)2431 static void __init __free_pages_memory(unsigned long start, unsigned long end)
2432 {
2433 int order;
2434
2435 while (start < end) {
2436 /*
2437 * Free the pages in the largest chunks alignment allows.
2438 *
2439 * __ffs() behaviour is undefined for 0. start == 0 is
2440 * MAX_PAGE_ORDER-aligned, set order to MAX_PAGE_ORDER for
2441 * the case.
2442 */
2443 if (start)
2444 order = min_t(int, MAX_PAGE_ORDER, __ffs(start));
2445 else
2446 order = MAX_PAGE_ORDER;
2447
2448 while (start + (1UL << order) > end)
2449 order--;
2450
2451 memblock_free_pages(pfn_to_page(start), start, order);
2452
2453 start += (1UL << order);
2454 }
2455 }
2456
__free_memory_core(phys_addr_t start,phys_addr_t end)2457 static unsigned long __init __free_memory_core(phys_addr_t start,
2458 phys_addr_t end)
2459 {
2460 unsigned long start_pfn = PFN_UP(start);
2461 unsigned long end_pfn = min_t(unsigned long,
2462 PFN_DOWN(end), max_low_pfn);
2463
2464 unsigned long start_align_up = PFN_ALIGN(start);
2465 unsigned long end_align_down = PFN_PHYS(end_pfn);
2466
2467 if (start_pfn >= end_pfn) {
2468 memblock_memsize_mod_kernel_size(end - start);
2469 } else {
2470 if (start_align_up > start)
2471 memblock_memsize_mod_kernel_size(start_align_up - start);
2472 if (end_pfn != max_low_pfn && end_align_down < end)
2473 memblock_memsize_mod_kernel_size(end - end_align_down);
2474 }
2475 if (start_pfn >= end_pfn)
2476 return 0;
2477
2478 __free_pages_memory(start_pfn, end_pfn);
2479
2480 return end_pfn - start_pfn;
2481 }
2482
memmap_init_reserved_pages(void)2483 static void __init memmap_init_reserved_pages(void)
2484 {
2485 struct memblock_region *region;
2486 phys_addr_t start, end;
2487 int nid;
2488 unsigned long max_reserved;
2489
2490 /*
2491 * set nid on all reserved pages and also treat struct
2492 * pages for the NOMAP regions as PageReserved
2493 */
2494 repeat:
2495 max_reserved = memblock.reserved.max;
2496 for_each_mem_region(region) {
2497 nid = memblock_get_region_node(region);
2498 start = region->base;
2499 end = start + region->size;
2500
2501 if (memblock_is_nomap(region))
2502 reserve_bootmem_region(start, end, nid);
2503
2504 memblock_set_node(start, region->size, &memblock.reserved, nid);
2505 }
2506 /*
2507 * 'max' is changed means memblock.reserved has been doubled its
2508 * array, which may result a new reserved region before current
2509 * 'start'. Now we should repeat the procedure to set its node id.
2510 */
2511 if (max_reserved != memblock.reserved.max)
2512 goto repeat;
2513
2514 /*
2515 * initialize struct pages for reserved regions that don't have
2516 * the MEMBLOCK_RSRV_NOINIT flag set
2517 */
2518 for_each_reserved_mem_region(region) {
2519 if (!memblock_is_reserved_noinit(region)) {
2520 nid = memblock_get_region_node(region);
2521 start = region->base;
2522 end = start + region->size;
2523
2524 if (!numa_valid_node(nid))
2525 nid = early_pfn_to_nid(PFN_DOWN(start));
2526
2527 reserve_bootmem_region(start, end, nid);
2528 }
2529 }
2530 }
2531
free_low_memory_core_early(void)2532 static unsigned long __init free_low_memory_core_early(void)
2533 {
2534 unsigned long count = 0;
2535 phys_addr_t start, end;
2536 u64 i;
2537
2538 memblock_clear_hotplug(0, -1);
2539
2540 memmap_init_reserved_pages();
2541
2542 /*
2543 * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id
2544 * because in some case like Node0 doesn't have RAM installed
2545 * low ram will be on Node1
2546 */
2547 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end,
2548 NULL)
2549 count += __free_memory_core(start, end);
2550
2551 return count;
2552 }
2553
2554 static int reset_managed_pages_done __initdata;
2555
reset_node_managed_pages(pg_data_t * pgdat)2556 static void __init reset_node_managed_pages(pg_data_t *pgdat)
2557 {
2558 struct zone *z;
2559
2560 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
2561 atomic_long_set(&z->managed_pages, 0);
2562 }
2563
reset_all_zones_managed_pages(void)2564 void __init reset_all_zones_managed_pages(void)
2565 {
2566 struct pglist_data *pgdat;
2567
2568 if (reset_managed_pages_done)
2569 return;
2570
2571 for_each_online_pgdat(pgdat)
2572 reset_node_managed_pages(pgdat);
2573
2574 reset_managed_pages_done = 1;
2575 }
2576
2577 /**
2578 * memblock_free_all - release free pages to the buddy allocator
2579 */
memblock_free_all(void)2580 void __init memblock_free_all(void)
2581 {
2582 unsigned long pages;
2583
2584 free_unused_memmap();
2585 reset_all_zones_managed_pages();
2586
2587 pages = free_low_memory_core_early();
2588 totalram_pages_add(pages);
2589
2590 memblock_memsize_disable_tracking();
2591 }
2592
2593 /* Keep a table to reserve named memory */
2594 #define RESERVE_MEM_MAX_ENTRIES 8
2595 #define RESERVE_MEM_NAME_SIZE 16
2596 struct reserve_mem_table {
2597 char name[RESERVE_MEM_NAME_SIZE];
2598 phys_addr_t start;
2599 phys_addr_t size;
2600 };
2601 static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES];
2602 static int reserved_mem_count;
2603
2604 /* Add wildcard region with a lookup name */
reserved_mem_add(phys_addr_t start,phys_addr_t size,const char * name)2605 static void __init reserved_mem_add(phys_addr_t start, phys_addr_t size,
2606 const char *name)
2607 {
2608 struct reserve_mem_table *map;
2609
2610 map = &reserved_mem_table[reserved_mem_count++];
2611 map->start = start;
2612 map->size = size;
2613 strscpy(map->name, name);
2614 }
2615
2616 /**
2617 * reserve_mem_find_by_name - Find reserved memory region with a given name
2618 * @name: The name that is attached to a reserved memory region
2619 * @start: If found, holds the start address
2620 * @size: If found, holds the size of the address.
2621 *
2622 * @start and @size are only updated if @name is found.
2623 *
2624 * Returns: 1 if found or 0 if not found.
2625 */
reserve_mem_find_by_name(const char * name,phys_addr_t * start,phys_addr_t * size)2626 int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size)
2627 {
2628 struct reserve_mem_table *map;
2629 int i;
2630
2631 for (i = 0; i < reserved_mem_count; i++) {
2632 map = &reserved_mem_table[i];
2633 if (!map->size)
2634 continue;
2635 if (strcmp(name, map->name) == 0) {
2636 *start = map->start;
2637 *size = map->size;
2638 return 1;
2639 }
2640 }
2641 return 0;
2642 }
2643 EXPORT_SYMBOL_GPL(reserve_mem_find_by_name);
2644
2645 /*
2646 * Parse reserve_mem=nn:align:name
2647 */
reserve_mem(char * p)2648 static int __init reserve_mem(char *p)
2649 {
2650 phys_addr_t start, size, align, tmp;
2651 char *name;
2652 char *oldp;
2653 int len;
2654
2655 if (!p)
2656 return -EINVAL;
2657
2658 /* Check if there's room for more reserved memory */
2659 if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES)
2660 return -EBUSY;
2661
2662 oldp = p;
2663 size = memparse(p, &p);
2664 if (!size || p == oldp)
2665 return -EINVAL;
2666
2667 if (*p != ':')
2668 return -EINVAL;
2669
2670 align = memparse(p+1, &p);
2671 if (*p != ':')
2672 return -EINVAL;
2673
2674 /*
2675 * memblock_phys_alloc() doesn't like a zero size align,
2676 * but it is OK for this command to have it.
2677 */
2678 if (align < SMP_CACHE_BYTES)
2679 align = SMP_CACHE_BYTES;
2680
2681 name = p + 1;
2682 len = strlen(name);
2683
2684 /* name needs to have length but not too big */
2685 if (!len || len >= RESERVE_MEM_NAME_SIZE)
2686 return -EINVAL;
2687
2688 /* Make sure that name has text */
2689 for (p = name; *p; p++) {
2690 if (!isspace(*p))
2691 break;
2692 }
2693 if (!*p)
2694 return -EINVAL;
2695
2696 /* Make sure the name is not already used */
2697 if (reserve_mem_find_by_name(name, &start, &tmp))
2698 return -EBUSY;
2699
2700 start = memblock_phys_alloc(size, align);
2701 if (!start)
2702 return -ENOMEM;
2703
2704 reserved_mem_add(start, size, name);
2705
2706 return 1;
2707 }
2708 __setup("reserve_mem=", reserve_mem);
2709
2710 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK)
2711 static const char * const flagname[] = {
2712 [ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG",
2713 [ilog2(MEMBLOCK_MIRROR)] = "MIRROR",
2714 [ilog2(MEMBLOCK_NOMAP)] = "NOMAP",
2715 [ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG",
2716 [ilog2(MEMBLOCK_RSRV_NOINIT)] = "RSV_NIT",
2717 };
2718
memblock_debug_show(struct seq_file * m,void * private)2719 static int memblock_debug_show(struct seq_file *m, void *private)
2720 {
2721 struct memblock_type *type = m->private;
2722 struct memblock_region *reg;
2723 int i, j, nid;
2724 unsigned int count = ARRAY_SIZE(flagname);
2725 phys_addr_t end;
2726
2727 for (i = 0; i < type->cnt; i++) {
2728 reg = &type->regions[i];
2729 end = reg->base + reg->size - 1;
2730 nid = memblock_get_region_node(reg);
2731
2732 seq_printf(m, "%4d: ", i);
2733 seq_printf(m, "%pa..%pa ", ®->base, &end);
2734 if (numa_valid_node(nid))
2735 seq_printf(m, "%4d ", nid);
2736 else
2737 seq_printf(m, "%4c ", 'x');
2738 if (reg->flags) {
2739 for (j = 0; j < count; j++) {
2740 if (reg->flags & (1U << j)) {
2741 seq_printf(m, "%s\n", flagname[j]);
2742 break;
2743 }
2744 }
2745 if (j == count)
2746 seq_printf(m, "%s\n", "UNKNOWN");
2747 } else {
2748 seq_printf(m, "%s\n", "NONE");
2749 }
2750 }
2751 return 0;
2752 }
2753 DEFINE_SHOW_ATTRIBUTE(memblock_debug);
2754
2755 /* assume that freeing region is NOT bigger than the previous region */
memblock_memsize_free(phys_addr_t free_base,phys_addr_t free_size)2756 static void memblock_memsize_free(phys_addr_t free_base,
2757 phys_addr_t free_size)
2758 {
2759 int i;
2760 struct memsize_rgn_struct *rgn;
2761 phys_addr_t free_end, end;
2762
2763 free_end = free_base + free_size - 1;
2764 memblock_dbg("%s %pa..%pa\n",
2765 __func__, &free_base, &free_end);
2766
2767 for (i = 0; i < memsize_rgn_count; i++) {
2768 rgn = &memsize_rgn[i];
2769
2770 end = rgn->base + rgn->size;
2771 if (free_base < rgn->base ||
2772 free_base >= end)
2773 continue;
2774
2775 free_end = free_base + free_size;
2776 if (free_base == rgn->base) {
2777 rgn->size -= free_size;
2778 if (rgn->size != 0)
2779 rgn->base += free_size;
2780 } else if (free_end == end) {
2781 rgn->size -= free_size;
2782 } else {
2783 memblock_memsize_record(rgn->name, free_end,
2784 end - free_end, rgn->nomap, rgn->reusable);
2785 rgn->size = free_base - rgn->base;
2786 }
2787 }
2788 }
2789
memsize_rgn_cmp(const void * a,const void * b)2790 static int memsize_rgn_cmp(const void *a, const void *b)
2791 {
2792 const struct memsize_rgn_struct *ra = a, *rb = b;
2793
2794 if (ra->base > rb->base)
2795 return -1;
2796
2797 if (ra->base < rb->base)
2798 return 1;
2799
2800 return 0;
2801 }
2802
2803 /* assume that freed size is always 64 KB aligned */
memblock_memsize_check_size(struct memsize_rgn_struct * rgn)2804 static inline void memblock_memsize_check_size(struct memsize_rgn_struct *rgn)
2805 {
2806 phys_addr_t phy, end, freed = 0;
2807 bool has_freed = false;
2808 struct page *page;
2809
2810 if (rgn->reusable || rgn->nomap)
2811 return;
2812
2813 phy = rgn->base;
2814 end = rgn->base + rgn->size;
2815 while (phy < end) {
2816 unsigned long pfn = __phys_to_pfn(phy);
2817
2818 if (!pfn_valid(pfn))
2819 return;
2820 page = pfn_to_page(pfn);
2821 if (!has_freed && !PageReserved(page)) {
2822 has_freed = true;
2823 freed = phy;
2824 } else if (has_freed && PageReserved(page)) {
2825 has_freed = false;
2826 memblock_memsize_free(freed, phy - freed);
2827 }
2828
2829 if (has_freed && (phy + SZ_64K >= end))
2830 memblock_memsize_free(freed, end - freed);
2831
2832 /* check the first page only */
2833 phy += SZ_64K;
2834 }
2835 }
2836
memblock_memsize_show(struct seq_file * m,void * private)2837 static int memblock_memsize_show(struct seq_file *m, void *private)
2838 {
2839 int i;
2840 struct memsize_rgn_struct *rgn;
2841 unsigned long reserved = 0, reusable = 0, total;
2842 unsigned long system = totalram_pages() << PAGE_SHIFT;
2843 unsigned long etc;
2844
2845 etc = memsize_kinit;
2846 etc -= memsize_code + memsize_data + memsize_ro + memsize_bss +
2847 memsize_memmap;
2848
2849 system += memsize_reusable_size;
2850 sort(memsize_rgn, memsize_rgn_count,
2851 sizeof(memsize_rgn[0]), memsize_rgn_cmp, NULL);
2852 for (i = 0; i < memsize_rgn_count; i++) {
2853 phys_addr_t base, end;
2854 long size;
2855
2856 rgn = &memsize_rgn[i];
2857 memblock_memsize_check_size(rgn);
2858 base = rgn->base;
2859 size = rgn->size;
2860 end = base + size;
2861
2862 seq_printf(m, "0x%pK-0x%pK 0x%08lx ( %7lu KB ) %s %s %s\n",
2863 (void *)base, (void *)end,
2864 size, DIV_ROUND_UP(size, SZ_1K),
2865 rgn->nomap ? "nomap" : " map",
2866 rgn->reusable ? "reusable" : "unusable",
2867 rgn->name);
2868 if (rgn->reusable)
2869 reusable += (unsigned long)rgn->size;
2870 else
2871 reserved += (unsigned long)rgn->size;
2872 }
2873
2874 total = memsize_kinit + reserved + system;
2875
2876 seq_puts(m, "\n");
2877 seq_printf(m, "Reserved : %7lu KB\n",
2878 DIV_ROUND_UP(memsize_kinit + reserved, SZ_1K));
2879 seq_printf(m, " .kernel : %7lu KB\n",
2880 DIV_ROUND_UP(memsize_kinit, SZ_1K));
2881 seq_printf(m, " .text : %7lu KB\n"
2882 " .rwdata : %7lu KB\n"
2883 " .rodata : %7lu KB\n"
2884 " .bss : %7lu KB\n"
2885 " .memmap : %7lu KB\n"
2886 " .etc : %7lu KB\n",
2887 DIV_ROUND_UP(memsize_code, SZ_1K),
2888 DIV_ROUND_UP(memsize_data, SZ_1K),
2889 DIV_ROUND_UP(memsize_ro, SZ_1K),
2890 DIV_ROUND_UP(memsize_bss, SZ_1K),
2891 DIV_ROUND_UP(memsize_memmap, SZ_1K),
2892 DIV_ROUND_UP(etc, SZ_1K));
2893 seq_printf(m, " .unusable : %7lu KB\n",
2894 DIV_ROUND_UP(reserved, SZ_1K));
2895 seq_printf(m, "System : %7lu KB\n",
2896 DIV_ROUND_UP(system, SZ_1K));
2897 seq_printf(m, " .common : %7lu KB\n",
2898 DIV_ROUND_UP(system - reusable, SZ_1K));
2899 seq_printf(m, " .reusable : %7lu KB\n",
2900 DIV_ROUND_UP(reusable, SZ_1K));
2901 seq_printf(m, "Total : %7lu KB ( %5lu.%02lu MB )\n",
2902 DIV_ROUND_UP(total, SZ_1K),
2903 total >> 20, ((total % SZ_1M) * 100) >> 20);
2904 return 0;
2905 }
2906
2907 DEFINE_SHOW_ATTRIBUTE(memblock_memsize);
2908
memblock_init_debugfs(void)2909 static int __init memblock_init_debugfs(void)
2910 {
2911 struct dentry *root = debugfs_create_dir("memblock", NULL);
2912
2913 debugfs_create_file("memory", 0444, root,
2914 &memblock.memory, &memblock_debug_fops);
2915 debugfs_create_file("reserved", 0444, root,
2916 &memblock.reserved, &memblock_debug_fops);
2917 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
2918 debugfs_create_file("physmem", 0444, root, &physmem,
2919 &memblock_debug_fops);
2920 #endif
2921 if (memsize_state == MEMBLOCK_MEMSIZE_DEBUGFS)
2922 debugfs_create_file("memsize", 0444, root, NULL,
2923 &memblock_memsize_fops);
2924 else if (memsize_state == MEMBLOCK_MEMSIZE_PROCFS)
2925 proc_create_single("memsize", 0, NULL,
2926 memblock_memsize_show);
2927
2928 return 0;
2929 }
2930 __initcall(memblock_init_debugfs);
2931
2932 #endif /* CONFIG_DEBUG_FS */
2933