• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * linux/kernel/power/snapshot.c
4  *
5  * This file provides system snapshot/restore functionality for swsusp.
6  *
7  * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz>
8  * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
9  */
10 
11 #define pr_fmt(fmt) "PM: hibernation: " fmt
12 
13 #include <linux/version.h>
14 #include <linux/module.h>
15 #include <linux/mm.h>
16 #include <linux/suspend.h>
17 #include <linux/delay.h>
18 #include <linux/bitops.h>
19 #include <linux/spinlock.h>
20 #include <linux/kernel.h>
21 #include <linux/pm.h>
22 #include <linux/device.h>
23 #include <linux/init.h>
24 #include <linux/memblock.h>
25 #include <linux/nmi.h>
26 #include <linux/syscalls.h>
27 #include <linux/console.h>
28 #include <linux/highmem.h>
29 #include <linux/list.h>
30 #include <linux/slab.h>
31 #include <linux/compiler.h>
32 #include <linux/ktime.h>
33 #include <linux/set_memory.h>
34 
35 #include <linux/uaccess.h>
36 #include <asm/mmu_context.h>
37 #include <asm/tlbflush.h>
38 #include <asm/io.h>
39 
40 #include "power.h"
41 
42 #define SNAPSHOT_TWO 2
43 #define SNAPSHOT_FIVE 5
44 
45 #if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_ARCH_HAS_SET_MEMORY)
46 static bool hibernate_restore_protection;
47 static bool hibernate_restore_protection_active;
48 
enable_restore_image_protection(void)49 void enable_restore_image_protection(void)
50 {
51     hibernate_restore_protection = true;
52 }
53 
hibernate_restore_protection_begin(void)54 static inline void hibernate_restore_protection_begin(void)
55 {
56     hibernate_restore_protection_active = hibernate_restore_protection;
57 }
58 
hibernate_restore_protection_end(void)59 static inline void hibernate_restore_protection_end(void)
60 {
61     hibernate_restore_protection_active = false;
62 }
63 
hibernate_restore_protect_page(void * page_address)64 static inline void hibernate_restore_protect_page(void *page_address)
65 {
66     if (hibernate_restore_protection_active) {
67         set_memory_ro((unsigned long)page_address, 1);
68     }
69 }
70 
hibernate_restore_unprotect_page(void * page_address)71 static inline void hibernate_restore_unprotect_page(void *page_address)
72 {
73     if (hibernate_restore_protection_active) {
74         set_memory_rw((unsigned long)page_address, 1);
75     }
76 }
77 #else
hibernate_restore_protection_begin(void)78 static inline void hibernate_restore_protection_begin(void)
79 {
80 }
hibernate_restore_protection_end(void)81 static inline void hibernate_restore_protection_end(void)
82 {
83 }
hibernate_restore_protect_page(void * page_address)84 static inline void hibernate_restore_protect_page(void *page_address)
85 {
86 }
hibernate_restore_unprotect_page(void * page_address)87 static inline void hibernate_restore_unprotect_page(void *page_address)
88 {
89 }
90 #endif /* CONFIG_STRICT_KERNEL_RWX  && CONFIG_ARCH_HAS_SET_MEMORY */
91 
92 static int swsusp_page_is_free(struct page *);
93 static void swsusp_set_page_forbidden(struct page *);
94 static void swsusp_unset_page_forbidden(struct page *);
95 
96 /*
97  * Number of bytes to reserve for memory allocations made by device drivers
98  * from their ->freeze() and ->freeze_noirq() callbacks so that they don't
99  * cause image creation to fail (tunable via /sys/power/reserved_size).
100  */
101 unsigned long reserved_size;
102 
hibernate_reserved_size_init(void)103 void __init hibernate_reserved_size_init(void)
104 {
105     reserved_size = SPARE_PAGES * PAGE_SIZE;
106 }
107 
108 /*
109  * Preferred image size in bytes (tunable via /sys/power/image_size).
110  * When it is set to N, swsusp will do its best to ensure the image
111  * size will not exceed N bytes, but if that is impossible, it will
112  * try to create the smallest image possible.
113  */
114 unsigned long image_size;
115 
hibernate_image_size_init(void)116 void __init hibernate_image_size_init(void)
117 {
118     image_size = ((totalram_pages() * SNAPSHOT_TWO) / SNAPSHOT_FIVE) * PAGE_SIZE;
119 }
120 
121 /*
122  * List of PBEs needed for restoring the pages that were allocated before
123  * the suspend and included in the suspend image, but have also been
124  * allocated by the "resume" kernel, so their contents cannot be written
125  * directly to their "original" page frames.
126  */
127 struct pbe *restore_pblist;
128 
129 /* struct linked_page is used to build chains of pages */
130 
131 #define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *))
132 
133 struct linked_page {
134     struct linked_page *next;
135     char data[LINKED_PAGE_DATA_SIZE];
136 } __packed;
137 
138 /*
139  * List of "safe" pages (ie. pages that were not used by the image kernel
140  * before hibernation) that may be used as temporary storage for image kernel
141  * memory contents.
142  */
143 static struct linked_page *safe_pages_list;
144 
145 /* Pointer to an auxiliary buffer (1 page) */
146 static void *buffer;
147 
148 #define PG_ANY 0
149 #define PG_SAFE 1
150 #define PG_UNSAFE_CLEAR 1
151 #define PG_UNSAFE_KEEP 0
152 
153 static unsigned int allocated_unsafe_pages;
154 
155 /**
156  * get_image_page - Allocate a page for a hibernation image.
157  * @gfp_mask: GFP mask for the allocation.
158  * @safe_needed: Get pages that were not used before hibernation (restore only)
159  *
160  * During image restoration, for storing the PBE list and the image data, we can
161  * only use memory pages that do not conflict with the pages used before
162  * hibernation.  The "unsafe" pages have PageNosaveFree set and we count them
163  * using allocated_unsafe_pages.
164  *
165  * Each allocated image page is marked as PageNosave and PageNosaveFree so that
166  * swsusp_free() can release it.
167  */
get_image_page(gfp_t gfp_mask,int safe_needed)168 static void *get_image_page(gfp_t gfp_mask, int safe_needed)
169 {
170     void *res;
171 
172     res = (void *)get_zeroed_page(gfp_mask);
173     if (safe_needed) {
174         while (res && swsusp_page_is_free(virt_to_page(res))) {
175             /* The page is unsafe, mark it for swsusp_free() */
176             swsusp_set_page_forbidden(virt_to_page(res));
177             allocated_unsafe_pages++;
178             res = (void *)get_zeroed_page(gfp_mask);
179         }
180     }
181     if (res) {
182         swsusp_set_page_forbidden(virt_to_page(res));
183         swsusp_set_page_free(virt_to_page(res));
184     }
185     return res;
186 }
187 
_get_safe_page(gfp_t gfp_mask)188 static void *_get_safe_page(gfp_t gfp_mask)
189 {
190     if (safe_pages_list) {
191         void *ret = safe_pages_list;
192 
193         safe_pages_list = safe_pages_list->next;
194         memset(ret, 0, PAGE_SIZE);
195         return ret;
196     }
197     return get_image_page(gfp_mask, PG_SAFE);
198 }
199 
get_safe_page(gfp_t gfp_mask)200 unsigned long get_safe_page(gfp_t gfp_mask)
201 {
202     return (unsigned long)_get_safe_page(gfp_mask);
203 }
204 
alloc_image_page(gfp_t gfp_mask)205 static struct page *alloc_image_page(gfp_t gfp_mask)
206 {
207     struct page *page;
208 
209     page = alloc_page(gfp_mask);
210     if (page) {
211         swsusp_set_page_forbidden(page);
212         swsusp_set_page_free(page);
213     }
214     return page;
215 }
216 
recycle_safe_page(void * page_address)217 static void recycle_safe_page(void *page_address)
218 {
219     struct linked_page *lp = page_address;
220 
221     lp->next = safe_pages_list;
222     safe_pages_list = lp;
223 }
224 
225 /**
226  * free_image_page - Free a page allocated for hibernation image.
227  * @addr: Address of the page to free.
228  * @clear_nosave_free: If set, clear the PageNosaveFree bit for the page.
229  *
230  * The page to free should have been allocated by get_image_page() (page flags
231  * set by it are affected).
232  */
free_image_page(void * addr,int clear_nosave_free)233 static inline void free_image_page(void *addr, int clear_nosave_free)
234 {
235     struct page *page;
236 
237     BUG_ON(!virt_addr_valid(addr));
238 
239     page = virt_to_page(addr);
240 
241     swsusp_unset_page_forbidden(page);
242     if (clear_nosave_free) {
243         swsusp_unset_page_free(page);
244     }
245 
246     __free_page(page);
247 }
248 
free_list_of_pages(struct linked_page * list,int clear_page_nosave)249 static inline void free_list_of_pages(struct linked_page *list, int clear_page_nosave)
250 {
251     while (list) {
252         struct linked_page *lp = list->next;
253 
254         free_image_page(list, clear_page_nosave);
255         list = lp;
256     }
257 }
258 
259 /*
260  * struct chain_allocator is used for allocating small objects out of
261  * a linked list of pages called 'the chain'.
262  *
263  * The chain grows each time when there is no room for a new object in
264  * the current page.  The allocated objects cannot be freed individually.
265  * It is only possible to free them all at once, by freeing the entire
266  * chain.
267  *
268  * NOTE: The chain allocator may be inefficient if the allocated objects
269  * are not much smaller than PAGE_SIZE.
270  */
271 struct chain_allocator {
272     struct linked_page *chain; /* the chain */
273     unsigned int used_space;   /* total size of objects allocated out
274                       of the current page */
275     gfp_t gfp_mask;            /* mask for allocating pages */
276     int safe_needed;           /* if set, only "safe" pages are allocated */
277 };
278 
chain_init(struct chain_allocator * ca,gfp_t gfp_mask,int safe_needed)279 static void chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
280 {
281     ca->chain = NULL;
282     ca->used_space = LINKED_PAGE_DATA_SIZE;
283     ca->gfp_mask = gfp_mask;
284     ca->safe_needed = safe_needed;
285 }
286 
chain_alloc(struct chain_allocator * ca,unsigned int size)287 static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
288 {
289     void *ret;
290 
291     if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
292         struct linked_page *lp;
293 
294         lp = ca->safe_needed ? _get_safe_page(ca->gfp_mask) : get_image_page(ca->gfp_mask, PG_ANY);
295         if (!lp) {
296             return NULL;
297         }
298 
299         lp->next = ca->chain;
300         ca->chain = lp;
301         ca->used_space = 0;
302     }
303     ret = ca->chain->data + ca->used_space;
304     ca->used_space += size;
305     return ret;
306 }
307 
308 /**
309  * Data types related to memory bitmaps.
310  *
311  * Memory bitmap is a structure consiting of many linked lists of
312  * objects.  The main list's elements are of type struct zone_bitmap
313  * and each of them corresonds to one zone.  For each zone bitmap
314  * object there is a list of objects of type struct bm_block that
315  * represent each blocks of bitmap in which information is stored.
316  *
317  * struct memory_bitmap contains a pointer to the main list of zone
318  * bitmap objects, a struct bm_position used for browsing the bitmap,
319  * and a pointer to the list of pages used for allocating all of the
320  * zone bitmap objects and bitmap block objects.
321  *
322  * NOTE: It has to be possible to lay out the bitmap in memory
323  * using only allocations of order 0.  Additionally, the bitmap is
324  * designed to work with arbitrary number of zones (this is over the
325  * top for now, but let's avoid making unnecessary assumptions ;-).
326  *
327  * struct zone_bitmap contains a pointer to a list of bitmap block
328  * objects and a pointer to the bitmap block object that has been
329  * most recently used for setting bits.  Additionally, it contains the
330  * PFNs that correspond to the start and end of the represented zone.
331  *
332  * struct bm_block contains a pointer to the memory page in which
333  * information is stored (in the form of a block of bitmap)
334  * It also contains the pfns that correspond to the start and end of
335  * the represented memory area.
336  *
337  * The memory bitmap is organized as a radix tree to guarantee fast random
338  * access to the bits. There is one radix tree for each zone (as returned
339  * from create_mem_extents).
340  *
341  * One radix tree is represented by one struct mem_zone_bm_rtree. There are
342  * two linked lists for the nodes of the tree, one for the inner nodes and
343  * one for the leave nodes. The linked leave nodes are used for fast linear
344  * access of the memory bitmap.
345  *
346  * The struct rtree_node represents one node of the radix tree.
347  */
348 
349 #define BM_END_OF_MAP (~0UL)
350 
351 #define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE)
352 #define BM_BLOCK_SHIFT (PAGE_SHIFT + 3)
353 #define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1)
354 
355 /*
356  * struct rtree_node is a wrapper struct to link the nodes
357  * of the rtree together for easy linear iteration over
358  * bits and easy freeing
359  */
360 struct rtree_node {
361     struct list_head list;
362     unsigned long *data;
363 };
364 
365 /*
366  * struct mem_zone_bm_rtree represents a bitmap used for one
367  * populated memory zone.
368  */
369 struct mem_zone_bm_rtree {
370     struct list_head list;    /* Link Zones together         */
371     struct list_head nodes;   /* Radix Tree inner nodes      */
372     struct list_head leaves;  /* Radix Tree leaves           */
373     unsigned long start_pfn;  /* Zone start page frame       */
374     unsigned long end_pfn;    /* Zone end page frame + 1     */
375     struct rtree_node *rtree; /* Radix Tree Root             */
376     int levels;               /* Number of Radix Tree Levels */
377     unsigned int blocks;      /* Number of Bitmap Blocks     */
378 };
379 
380 /* strcut bm_position is used for browsing memory bitmaps */
381 
382 struct bm_position {
383     struct mem_zone_bm_rtree *zone;
384     struct rtree_node *node;
385     unsigned long node_pfn;
386     int node_bit;
387 };
388 
389 struct memory_bitmap {
390     struct list_head zones;
391     struct linked_page *p_list; /* list of pages used to store zone
392                     bitmap objects and bitmap block
393                     objects */
394     struct bm_position cur;     /* most recently used bit position */
395 };
396 
397 /* Functions that operate on memory bitmaps */
398 
399 #define BM_ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(unsigned long))
400 #if BITS_PER_LONG == 32
401 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 2)
402 #else
403 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 3)
404 #endif
405 #define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1)
406 
407 /**
408  * alloc_rtree_node - Allocate a new node and add it to the radix tree.
409  *
410  * This function is used to allocate inner nodes as well as the
411  * leave nodes of the radix tree. It also adds the node to the
412  * corresponding linked list passed in by the *list parameter.
413  */
alloc_rtree_node(gfp_t gfp_mask,int safe_needed,struct chain_allocator * ca,struct list_head * list)414 static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, struct chain_allocator *ca,
415                                            struct list_head *list)
416 {
417     struct rtree_node *node;
418 
419     node = chain_alloc(ca, sizeof(struct rtree_node));
420     if (!node) {
421         return NULL;
422     }
423 
424     node->data = get_image_page(gfp_mask, safe_needed);
425     if (!node->data) {
426         return NULL;
427     }
428 
429     list_add_tail(&node->list, list);
430 
431     return node;
432 }
433 
434 /**
435  * add_rtree_block - Add a new leave node to the radix tree.
436  *
437  * The leave nodes need to be allocated in order to keep the leaves
438  * linked list in order. This is guaranteed by the zone->blocks
439  * counter.
440  */
add_rtree_block(struct mem_zone_bm_rtree * zone,gfp_t gfp_mask,int safe_needed,struct chain_allocator * ca)441 static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, int safe_needed, struct chain_allocator *ca)
442 {
443     struct rtree_node *node, *block, **dst;
444     unsigned int levels_needed, block_nr;
445     int i;
446 
447     block_nr = zone->blocks;
448     levels_needed = 0;
449 
450     /* How many levels do we need for this block nr? */
451     while (block_nr) {
452         levels_needed += 1;
453         block_nr >>= BM_RTREE_LEVEL_SHIFT;
454     }
455 
456     /* Make sure the rtree has enough levels */
457     for (i = zone->levels; i < levels_needed; i++) {
458         node = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->nodes);
459         if (!node) {
460             return -ENOMEM;
461         }
462 
463         node->data[0] = (unsigned long)zone->rtree;
464         zone->rtree = node;
465         zone->levels += 1;
466     }
467 
468     /* Allocate new block */
469     block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves);
470     if (!block) {
471         return -ENOMEM;
472     }
473 
474     /* Now walk the rtree to insert the block */
475     node = zone->rtree;
476     dst = &zone->rtree;
477     block_nr = zone->blocks;
478     for (i = zone->levels; i > 0; i--) {
479         int index;
480 
481         if (!node) {
482             node = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->nodes);
483             if (!node) {
484                 return -ENOMEM;
485             }
486             *dst = node;
487         }
488 
489         index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT);
490         index &= BM_RTREE_LEVEL_MASK;
491         dst = (struct rtree_node **)&((*dst)->data[index]);
492         node = *dst;
493     }
494 
495     zone->blocks += 1;
496     *dst = block;
497 
498     return 0;
499 }
500 
501 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, int clear_nosave_free);
502 
503 /**
504  * create_zone_bm_rtree - Create a radix tree for one zone.
505  *
506  * Allocated the mem_zone_bm_rtree structure and initializes it.
507  * This function also allocated and builds the radix tree for the
508  * zone.
509  */
create_zone_bm_rtree(gfp_t gfp_mask,int safe_needed,struct chain_allocator * ca,unsigned long start,unsigned long end)510 static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed, struct chain_allocator *ca,
511                                                       unsigned long start, unsigned long end)
512 {
513     struct mem_zone_bm_rtree *zone;
514     unsigned int i, nr_blocks;
515     unsigned long pages;
516 
517     pages = end - start;
518     zone = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree));
519     if (!zone) {
520         return NULL;
521     }
522 
523     INIT_LIST_HEAD(&zone->nodes);
524     INIT_LIST_HEAD(&zone->leaves);
525     zone->start_pfn = start;
526     zone->end_pfn = end;
527     nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
528 
529     for (i = 0; i < nr_blocks; i++) {
530         if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) {
531             free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR);
532             return NULL;
533         }
534     }
535 
536     return zone;
537 }
538 
539 /**
540  * free_zone_bm_rtree - Free the memory of the radix tree.
541  *
542  * Free all node pages of the radix tree. The mem_zone_bm_rtree
543  * structure itself is not freed here nor are the rtree_node
544  * structs.
545  */
free_zone_bm_rtree(struct mem_zone_bm_rtree * zone,int clear_nosave_free)546 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, int clear_nosave_free)
547 {
548     struct rtree_node *node;
549 
550     list_for_each_entry(node, &zone->nodes, list) free_image_page(node->data, clear_nosave_free);
551 
552     list_for_each_entry(node, &zone->leaves, list) free_image_page(node->data, clear_nosave_free);
553 }
554 
memory_bm_position_reset(struct memory_bitmap * bm)555 static void memory_bm_position_reset(struct memory_bitmap *bm)
556 {
557     bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree, list);
558     bm->cur.node = list_entry(bm->cur.zone->leaves.next, struct rtree_node, list);
559     bm->cur.node_pfn = 0;
560     bm->cur.node_bit = 0;
561 }
562 
563 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
564 
565 struct mem_extent {
566     struct list_head hook;
567     unsigned long start;
568     unsigned long end;
569 };
570 
571 /**
572  * free_mem_extents - Free a list of memory extents.
573  * @list: List of extents to free.
574  */
free_mem_extents(struct list_head * list)575 static void free_mem_extents(struct list_head *list)
576 {
577     struct mem_extent *ext, *aux;
578 
579     list_for_each_entry_safe(ext, aux, list, hook)
580     {
581         list_del(&ext->hook);
582         kfree(ext);
583     }
584 }
585 
586 /**
587  * create_mem_extents - Create a list of memory extents.
588  * @list: List to put the extents into.
589  * @gfp_mask: Mask to use for memory allocations.
590  *
591  * The extents represent contiguous ranges of PFNs.
592  */
create_mem_extents(struct list_head * list,gfp_t gfp_mask)593 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
594 {
595     struct zone *zone;
596 
597     INIT_LIST_HEAD(list);
598 
599     for_each_populated_zone(zone)
600     {
601         unsigned long zone_start, zone_end;
602         struct mem_extent *ext, *cur, *aux;
603 
604         zone_start = zone->zone_start_pfn;
605         zone_end = zone_end_pfn(zone);
606 
607         list_for_each_entry(ext, list, hook) if (zone_start <= ext->end) break;
608 
609         if (&ext->hook == list || zone_end < ext->start) {
610             /* New extent is necessary */
611             struct mem_extent *new_ext;
612 
613             new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
614             if (!new_ext) {
615                 free_mem_extents(list);
616                 return -ENOMEM;
617             }
618             new_ext->start = zone_start;
619             new_ext->end = zone_end;
620             list_add_tail(&new_ext->hook, &ext->hook);
621             continue;
622         }
623 
624         /* Merge this zone's range of PFNs with the existing one */
625         if (zone_start < ext->start) {
626             ext->start = zone_start;
627         }
628         if (zone_end > ext->end) {
629             ext->end = zone_end;
630         }
631 
632         /* More merging may be possible */
633         cur = ext;
634         list_for_each_entry_safe_continue(cur, aux, list, hook)
635         {
636             if (zone_end < cur->start) {
637                 break;
638             }
639             if (zone_end < cur->end) {
640                 ext->end = cur->end;
641             }
642             list_del(&cur->hook);
643             kfree(cur);
644         }
645     }
646 
647     return 0;
648 }
649 
650 /**
651  * memory_bm_create - Allocate memory for a memory bitmap.
652  */
memory_bm_create(struct memory_bitmap * bm,gfp_t gfp_mask,int safe_needed)653 static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
654 {
655     struct chain_allocator ca;
656     struct list_head mem_extents;
657     struct mem_extent *ext;
658     int error;
659 
660     chain_init(&ca, gfp_mask, safe_needed);
661     INIT_LIST_HEAD(&bm->zones);
662 
663     error = create_mem_extents(&mem_extents, gfp_mask);
664     if (error) {
665         return error;
666     }
667 
668     list_for_each_entry(ext, &mem_extents, hook)
669     {
670         struct mem_zone_bm_rtree *zone;
671 
672         zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca, ext->start, ext->end);
673         if (!zone) {
674             error = -ENOMEM;
675             goto Error;
676         }
677         list_add_tail(&zone->list, &bm->zones);
678     }
679 
680     bm->p_list = ca.chain;
681     memory_bm_position_reset(bm);
682     while (1) {
683         free_mem_extents(&mem_extents);
684         return error;
685 
686     Error:
687         bm->p_list = ca.chain;
688         memory_bm_free(bm, PG_UNSAFE_CLEAR);
689         continue;
690     }
691 }
692 
693 /**
694  * memory_bm_free - Free memory occupied by the memory bitmap.
695  * @bm: Memory bitmap.
696  */
memory_bm_free(struct memory_bitmap * bm,int clear_nosave_free)697 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
698 {
699     struct mem_zone_bm_rtree *zone;
700 
701     list_for_each_entry(zone, &bm->zones, list) free_zone_bm_rtree(zone, clear_nosave_free);
702 
703     free_list_of_pages(bm->p_list, clear_nosave_free);
704 
705     INIT_LIST_HEAD(&bm->zones);
706 }
707 
708 /**
709  * memory_bm_find_bit - Find the bit for a given PFN in a memory bitmap.
710  *
711  * Find the bit in memory bitmap @bm that corresponds to the given PFN.
712  * The cur.zone, cur.block and cur.node_pfn members of @bm are updated.
713  *
714  * Walk the radix tree to find the page containing the bit that represents @pfn
715  * and return the position of the bit in @addr and @bit_nr.
716  */
memory_bm_find_bit(struct memory_bitmap * bm,unsigned long pfn,void ** addr,unsigned int * bit_nr)717 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, void **addr, unsigned int *bit_nr)
718 {
719     struct mem_zone_bm_rtree *curr, *zone;
720     struct rtree_node *node;
721     int i, block_nr;
722 
723     zone = bm->cur.zone;
724 
725     if (pfn >= zone->start_pfn && pfn < zone->end_pfn) {
726         goto zone_found;
727     }
728 
729     zone = NULL;
730 
731     /* Find the right zone */
732     list_for_each_entry(curr, &bm->zones, list)
733     {
734         if (pfn >= curr->start_pfn && pfn < curr->end_pfn) {
735             zone = curr;
736             break;
737         }
738     }
739 
740     if (!zone) {
741         return -EFAULT;
742     }
743 
744 zone_found:
745     /*
746      * We have found the zone. Now walk the radix tree to find the leaf node
747      * for our PFN.
748      */
749 
750     /*
751      * If the zone we wish to scan is the current zone and the
752      * pfn falls into the current node then we do not need to walk
753      * the tree.
754      */
755     node = bm->cur.node;
756     if (zone == bm->cur.zone && ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) {
757         goto node_found;
758     }
759 
760     node = zone->rtree;
761     block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT;
762 
763     for (i = zone->levels; i > 0; i--) {
764         int index;
765 
766         index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT);
767         index &= BM_RTREE_LEVEL_MASK;
768         BUG_ON(node->data[index] == 0);
769         node = (struct rtree_node *)node->data[index];
770     }
771 
772 node_found:
773     /* Update last position */
774     bm->cur.zone = zone;
775     bm->cur.node = node;
776     bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
777 
778     /* Set return values */
779     *addr = node->data;
780     *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK;
781 
782     return 0;
783 }
784 
memory_bm_set_bit(struct memory_bitmap * bm,unsigned long pfn)785 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
786 {
787     void *addr;
788     unsigned int bit;
789     int error;
790 
791     error = memory_bm_find_bit(bm, pfn, &addr, &bit);
792     BUG_ON(error);
793     set_bit(bit, addr);
794 }
795 
mem_bm_set_bit_check(struct memory_bitmap * bm,unsigned long pfn)796 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
797 {
798     void *addr;
799     unsigned int bit;
800     int error;
801 
802     error = memory_bm_find_bit(bm, pfn, &addr, &bit);
803     if (!error) {
804         set_bit(bit, addr);
805     }
806 
807     return error;
808 }
809 
memory_bm_clear_bit(struct memory_bitmap * bm,unsigned long pfn)810 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
811 {
812     void *addr;
813     unsigned int bit;
814     int error;
815 
816     error = memory_bm_find_bit(bm, pfn, &addr, &bit);
817     BUG_ON(error);
818     clear_bit(bit, addr);
819 }
820 
memory_bm_clear_current(struct memory_bitmap * bm)821 static void memory_bm_clear_current(struct memory_bitmap *bm)
822 {
823     int bit;
824 
825     bit = max(bm->cur.node_bit - 1, 0);
826     clear_bit(bit, bm->cur.node->data);
827 }
828 
memory_bm_test_bit(struct memory_bitmap * bm,unsigned long pfn)829 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
830 {
831     void *addr;
832     unsigned int bit;
833     int error;
834 
835     error = memory_bm_find_bit(bm, pfn, &addr, &bit);
836     BUG_ON(error);
837     return test_bit(bit, addr);
838 }
839 
memory_bm_pfn_present(struct memory_bitmap * bm,unsigned long pfn)840 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
841 {
842     void *addr;
843     unsigned int bit;
844 
845     return !memory_bm_find_bit(bm, pfn, &addr, &bit);
846 }
847 
848 /*
849  * rtree_next_node - Jump to the next leaf node.
850  *
851  * Set the position to the beginning of the next node in the
852  * memory bitmap. This is either the next node in the current
853  * zone's radix tree or the first node in the radix tree of the
854  * next zone.
855  *
856  * Return true if there is a next node, false otherwise.
857  */
rtree_next_node(struct memory_bitmap * bm)858 static bool rtree_next_node(struct memory_bitmap *bm)
859 {
860     if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) {
861         bm->cur.node = list_entry(bm->cur.node->list.next, struct rtree_node, list);
862         bm->cur.node_pfn += BM_BITS_PER_BLOCK;
863         bm->cur.node_bit = 0;
864         touch_softlockup_watchdog();
865         return true;
866     }
867 
868     /* No more nodes, goto next zone */
869     if (!list_is_last(&bm->cur.zone->list, &bm->zones)) {
870         bm->cur.zone = list_entry(bm->cur.zone->list.next, struct mem_zone_bm_rtree, list);
871         bm->cur.node = list_entry(bm->cur.zone->leaves.next, struct rtree_node, list);
872         bm->cur.node_pfn = 0;
873         bm->cur.node_bit = 0;
874         return true;
875     }
876 
877     /* No more zones */
878     return false;
879 }
880 
881 /**
882  * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap.
883  * @bm: Memory bitmap.
884  *
885  * Starting from the last returned position this function searches for the next
886  * set bit in @bm and returns the PFN represented by it.  If no more bits are
887  * set, BM_END_OF_MAP is returned.
888  *
889  * It is required to run memory_bm_position_reset() before the first call to
890  * this function for the given memory bitmap.
891  */
memory_bm_next_pfn(struct memory_bitmap * bm)892 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
893 {
894     unsigned long bits, pfn, pages;
895     int bit;
896 
897     do {
898         pages = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn;
899         bits = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK);
900         bit = find_next_bit(bm->cur.node->data, bits, bm->cur.node_bit);
901         if (bit < bits) {
902             pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
903             bm->cur.node_bit = bit + 1;
904             return pfn;
905         }
906     } while (rtree_next_node(bm));
907 
908     return BM_END_OF_MAP;
909 }
910 
911 /*
912  * This structure represents a range of page frames the contents of which
913  * should not be saved during hibernation.
914  */
915 struct nosave_region {
916     struct list_head list;
917     unsigned long start_pfn;
918     unsigned long end_pfn;
919 };
920 
921 static LIST_HEAD(nosave_regions);
922 
recycle_zone_bm_rtree(struct mem_zone_bm_rtree * zone)923 static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone)
924 {
925     struct rtree_node *node;
926 
927     list_for_each_entry(node, &zone->nodes, list) recycle_safe_page(node->data);
928 
929     list_for_each_entry(node, &zone->leaves, list) recycle_safe_page(node->data);
930 }
931 
memory_bm_recycle(struct memory_bitmap * bm)932 static void memory_bm_recycle(struct memory_bitmap *bm)
933 {
934     struct mem_zone_bm_rtree *zone;
935     struct linked_page *p_list;
936 
937     list_for_each_entry(zone, &bm->zones, list) recycle_zone_bm_rtree(zone);
938 
939     p_list = bm->p_list;
940     while (p_list) {
941         struct linked_page *lp = p_list;
942 
943         p_list = lp->next;
944         recycle_safe_page(lp);
945     }
946 }
947 
948 /**
949  * register_nosave_region - Register a region of unsaveable memory.
950  *
951  * Register a range of page frames the contents of which should not be saved
952  * during hibernation (to be used in the early initialization code).
953  */
register_nosave_region(unsigned long start_pfn,unsigned long end_pfn)954 void __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
955 {
956     struct nosave_region *region;
957 
958     if (start_pfn >= end_pfn) {
959         return;
960     }
961 
962     if (!list_empty(&nosave_regions)) {
963         /* Try to extend the previous region (they should be sorted) */
964         region = list_entry(nosave_regions.prev,
965                     struct nosave_region, list);
966         if (region->end_pfn == start_pfn) {
967             region->end_pfn = end_pfn;
968             goto Report;
969         }
970     }
971     /* This allocation cannot fail */
972     region = memblock_alloc(sizeof(struct nosave_region),
973                 SMP_CACHE_BYTES);
974     if (!region)
975         panic("%s: Failed to allocate %zu bytes\n", __func__,
976               sizeof(struct nosave_region));
977     region->start_pfn = start_pfn;
978     region->end_pfn = end_pfn;
979     list_add_tail(&region->list, &nosave_regions);
980  Report:
981     pr_info("Registered nosave memory: [mem %#010llx-%#010llx]\n",
982         (unsigned long long) start_pfn << PAGE_SHIFT,
983         ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
984 }
985 
986 /*
987  * Set bits in this map correspond to the page frames the contents of which
988  * should not be saved during the suspend.
989  */
990 static struct memory_bitmap *forbidden_pages_map;
991 
992 /* Set bits in this map correspond to free page frames. */
993 static struct memory_bitmap *free_pages_map;
994 
995 /*
996  * Each page frame allocated for creating the image is marked by setting the
997  * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
998  */
999 
swsusp_set_page_free(struct page * page)1000 void swsusp_set_page_free(struct page *page)
1001 {
1002     if (free_pages_map) {
1003         memory_bm_set_bit(free_pages_map, page_to_pfn(page));
1004     }
1005 }
1006 
swsusp_page_is_free(struct page * page)1007 static int swsusp_page_is_free(struct page *page)
1008 {
1009     return free_pages_map ? memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
1010 }
1011 
swsusp_unset_page_free(struct page * page)1012 void swsusp_unset_page_free(struct page *page)
1013 {
1014     if (free_pages_map) {
1015         memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
1016     }
1017 }
1018 
swsusp_set_page_forbidden(struct page * page)1019 static void swsusp_set_page_forbidden(struct page *page)
1020 {
1021     if (forbidden_pages_map) {
1022         memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
1023     }
1024 }
1025 
swsusp_page_is_forbidden(struct page * page)1026 int swsusp_page_is_forbidden(struct page *page)
1027 {
1028     return forbidden_pages_map ? memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
1029 }
1030 
swsusp_unset_page_forbidden(struct page * page)1031 static void swsusp_unset_page_forbidden(struct page *page)
1032 {
1033     if (forbidden_pages_map) {
1034         memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
1035     }
1036 }
1037 
1038 /**
1039  * mark_nosave_pages - Mark pages that should not be saved.
1040  * @bm: Memory bitmap.
1041  *
1042  * Set the bits in @bm that correspond to the page frames the contents of which
1043  * should not be saved.
1044  */
mark_nosave_pages(struct memory_bitmap * bm)1045 static void mark_nosave_pages(struct memory_bitmap *bm)
1046 {
1047     struct nosave_region *region;
1048 
1049     if (list_empty(&nosave_regions)) {
1050         return;
1051     }
1052 
1053     list_for_each_entry(region, &nosave_regions, list)
1054     {
1055         unsigned long pfn;
1056 
1057         pr_debug("Marking nosave pages: [mem %#010llx-%#010llx]\n", (unsigned long long)region->start_pfn << PAGE_SHIFT,
1058                  ((unsigned long long)region->end_pfn << PAGE_SHIFT) - 1);
1059 
1060         for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) {
1061             if (pfn_valid(pfn)) {
1062                 /*
1063                  * It is safe to ignore the result of
1064                  * mem_bm_set_bit_check() here, since we won't
1065                  * touch the PFNs for which the error is
1066                  * returned anyway.
1067                  */
1068                 mem_bm_set_bit_check(bm, pfn);
1069             }
1070         }
1071     }
1072 }
1073 
1074 /**
1075  * create_basic_memory_bitmaps - Create bitmaps to hold basic page information.
1076  *
1077  * Create bitmaps needed for marking page frames that should not be saved and
1078  * free page frames.  The forbidden_pages_map and free_pages_map pointers are
1079  * only modified if everything goes well, because we don't want the bits to be
1080  * touched before both bitmaps are set up.
1081  */
create_basic_memory_bitmaps(void)1082 int create_basic_memory_bitmaps(void)
1083 {
1084     struct memory_bitmap *bm1, *bm2;
1085     int error = 0;
1086 
1087     if (forbidden_pages_map && free_pages_map) {
1088         return 0;
1089     } else {
1090         BUG_ON(forbidden_pages_map || free_pages_map);
1091     }
1092 
1093     bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
1094     if (!bm1) {
1095         return -ENOMEM;
1096     }
1097 
1098     error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
1099     if (error) {
1100         goto Free_first_object;
1101     }
1102 
1103     bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
1104     if (!bm2) {
1105         goto Free_first_bitmap;
1106     }
1107 
1108     error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
1109     if (error) {
1110         goto Free_second_object;
1111     }
1112 
1113     forbidden_pages_map = bm1;
1114     free_pages_map = bm2;
1115     mark_nosave_pages(forbidden_pages_map);
1116 
1117     pr_debug("Basic memory bitmaps created\n");
1118 
1119     return 0;
1120 
1121 Free_second_object:
1122     kfree(bm2);
1123 Free_first_bitmap:
1124     memory_bm_free(bm1, PG_UNSAFE_CLEAR);
1125 Free_first_object:
1126     kfree(bm1);
1127     return -ENOMEM;
1128 }
1129 
1130 /**
1131  * free_basic_memory_bitmaps - Free memory bitmaps holding basic information.
1132  *
1133  * Free memory bitmaps allocated by create_basic_memory_bitmaps().  The
1134  * auxiliary pointers are necessary so that the bitmaps themselves are not
1135  * referred to while they are being freed.
1136  */
free_basic_memory_bitmaps(void)1137 void free_basic_memory_bitmaps(void)
1138 {
1139     struct memory_bitmap *bm1, *bm2;
1140 
1141     if (WARN_ON(!(forbidden_pages_map && free_pages_map))) {
1142         return;
1143     }
1144 
1145     bm1 = forbidden_pages_map;
1146     bm2 = free_pages_map;
1147     forbidden_pages_map = NULL;
1148     free_pages_map = NULL;
1149     memory_bm_free(bm1, PG_UNSAFE_CLEAR);
1150     kfree(bm1);
1151     memory_bm_free(bm2, PG_UNSAFE_CLEAR);
1152     kfree(bm2);
1153 
1154     pr_debug("Basic memory bitmaps freed\n");
1155 }
1156 
clear_or_poison_free_page(struct page * page)1157 static void clear_or_poison_free_page(struct page *page)
1158 {
1159     if (page_poisoning_enabled_static()) {
1160         _kernel_poison_pages(page, 1);
1161     } else if (want_init_on_free()) {
1162         clear_highpage(page);
1163     }
1164 }
1165 
clear_or_poison_free_pages(void)1166 void clear_or_poison_free_pages(void)
1167 {
1168     struct memory_bitmap *bm = free_pages_map;
1169     unsigned long pfn;
1170 
1171     if (WARN_ON(!(free_pages_map))) {
1172         return;
1173     }
1174 
1175     if (page_poisoning_enabled() || want_init_on_free()) {
1176         memory_bm_position_reset(bm);
1177         pfn = memory_bm_next_pfn(bm);
1178         while (pfn != BM_END_OF_MAP) {
1179             if (pfn_valid(pfn)) {
1180                 clear_or_poison_free_page(pfn_to_page(pfn));
1181             }
1182 
1183             pfn = memory_bm_next_pfn(bm);
1184         }
1185         memory_bm_position_reset(bm);
1186         pr_info("free pages cleared after restore\n");
1187     }
1188 }
1189 
1190 /**
1191  * snapshot_additional_pages - Estimate the number of extra pages needed.
1192  * @zone: Memory zone to carry out the computation for.
1193  *
1194  * Estimate the number of additional pages needed for setting up a hibernation
1195  * image data structures for @zone (usually, the returned value is greater than
1196  * the exact number).
1197  */
snapshot_additional_pages(struct zone * zone)1198 unsigned int snapshot_additional_pages(struct zone *zone)
1199 {
1200     unsigned int rtree, nodes;
1201 
1202     rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
1203     rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node), LINKED_PAGE_DATA_SIZE);
1204     while (nodes > 1) {
1205         nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL);
1206         rtree += nodes;
1207     }
1208 
1209     return 0x2 * rtree;
1210 }
1211 
1212 #ifdef CONFIG_HIGHMEM
1213 /**
1214  * count_free_highmem_pages - Compute the total number of free highmem pages.
1215  *
1216  * The returned number is system-wide.
1217  */
count_free_highmem_pages(void)1218 static unsigned int count_free_highmem_pages(void)
1219 {
1220     struct zone *zone;
1221     unsigned int cnt = 0;
1222 
1223     for_each_populated_zone(zone) if (is_highmem(zone)) cnt += zone_page_state(zone, NR_FREE_PAGES);
1224 
1225     return cnt;
1226 }
1227 
1228 /**
1229  * saveable_highmem_page - Check if a highmem page is saveable.
1230  *
1231  * Determine whether a highmem page should be included in a hibernation image.
1232  *
1233  * We should save the page if it isn't Nosave or NosaveFree, or Reserved,
1234  * and it isn't part of a free chunk of pages.
1235  */
saveable_highmem_page(struct zone * zone,unsigned long pfn)1236 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
1237 {
1238     struct page *page;
1239 
1240     if (!pfn_valid(pfn)) {
1241         return NULL;
1242     }
1243 
1244     page = pfn_to_online_page(pfn);
1245     if (!page || page_zone(page) != zone) {
1246         return NULL;
1247     }
1248 
1249     BUG_ON(!PageHighMem(page));
1250 
1251     if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) {
1252         return NULL;
1253     }
1254 
1255     if (PageReserved(page) || PageOffline(page)) {
1256         return NULL;
1257     }
1258 
1259     if (page_is_guard(page)) {
1260         return NULL;
1261     }
1262 
1263     return page;
1264 }
1265 
1266 /**
1267  * count_highmem_pages - Compute the total number of saveable highmem pages.
1268  */
count_highmem_pages(void)1269 static unsigned int count_highmem_pages(void)
1270 {
1271     struct zone *zone;
1272     unsigned int n = 0;
1273 
1274     for_each_populated_zone(zone)
1275     {
1276         unsigned long pfn, max_zone_pfn;
1277 
1278         if (!is_highmem(zone)) {
1279             continue;
1280         }
1281 
1282         mark_free_pages(zone);
1283         max_zone_pfn = zone_end_pfn(zone);
1284         for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
1285             if (saveable_highmem_page(zone, pfn)) {
1286                 n++;
1287             }
1288         }
1289     }
1290     return n;
1291 }
1292 #else
saveable_highmem_page(struct zone * z,unsigned long p)1293 static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
1294 {
1295     return NULL;
1296 }
1297 #endif /* CONFIG_HIGHMEM */
1298 
1299 /**
1300  * saveable_page - Check if the given page is saveable.
1301  *
1302  * Determine whether a non-highmem page should be included in a hibernation
1303  * image.
1304  *
1305  * We should save the page if it isn't Nosave, and is not in the range
1306  * of pages statically defined as 'unsaveable', and it isn't part of
1307  * a free chunk of pages.
1308  */
saveable_page(struct zone * zone,unsigned long pfn)1309 static struct page *saveable_page(struct zone *zone, unsigned long pfn)
1310 {
1311     struct page *page;
1312 
1313     if (!pfn_valid(pfn)) {
1314         return NULL;
1315     }
1316 
1317     page = pfn_to_online_page(pfn);
1318     if (!page || page_zone(page) != zone) {
1319         return NULL;
1320     }
1321 
1322     BUG_ON(PageHighMem(page));
1323 
1324     if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) {
1325         return NULL;
1326     }
1327 
1328     if (PageOffline(page)) {
1329         return NULL;
1330     }
1331 
1332     if (PageReserved(page) && (!kernel_page_present(page) || pfn_is_nosave(pfn))) {
1333         return NULL;
1334     }
1335 
1336     if (page_is_guard(page)) {
1337         return NULL;
1338     }
1339 
1340     return page;
1341 }
1342 
1343 /**
1344  * count_data_pages - Compute the total number of saveable non-highmem pages.
1345  */
count_data_pages(void)1346 static unsigned int count_data_pages(void)
1347 {
1348     struct zone *zone;
1349     unsigned long pfn, max_zone_pfn;
1350     unsigned int n = 0;
1351 
1352     for_each_populated_zone(zone)
1353     {
1354         if (is_highmem(zone)) {
1355             continue;
1356         }
1357 
1358         mark_free_pages(zone);
1359         max_zone_pfn = zone_end_pfn(zone);
1360         for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
1361             if (saveable_page(zone, pfn)) {
1362                 n++;
1363             }
1364         }
1365     }
1366     return n;
1367 }
1368 
1369 /*
1370  * This is needed, because copy_page and memcpy are not usable for copying
1371  * task structs.
1372  */
do_copy_page(long * dst,long * src)1373 static inline void do_copy_page(long *dst, long *src)
1374 {
1375     int n;
1376 
1377     for (n = PAGE_SIZE / sizeof(long); n; n--) {
1378         *dst++ = *src++;
1379     }
1380 }
1381 
1382 /**
1383  * safe_copy_page - Copy a page in a safe way.
1384  *
1385  * Check if the page we are going to copy is marked as present in the kernel
1386  * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or
1387  * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
1388  * always returns 'true'.
1389  */
safe_copy_page(void * dst,struct page * s_page)1390 static void safe_copy_page(void *dst, struct page *s_page)
1391 {
1392     if (kernel_page_present(s_page)) {
1393         do_copy_page(dst, page_address(s_page));
1394     } else {
1395         kernel_map_pages(s_page, 1, 1);
1396         do_copy_page(dst, page_address(s_page));
1397         kernel_map_pages(s_page, 1, 0);
1398     }
1399 }
1400 
1401 #ifdef CONFIG_HIGHMEM
page_is_saveable(struct zone * zone,unsigned long pfn)1402 static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn)
1403 {
1404     return is_highmem(zone) ? saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
1405 }
1406 
copy_data_page(unsigned long dst_pfn,unsigned long src_pfn)1407 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
1408 {
1409     struct page *s_page, *d_page;
1410     void *src, *dst;
1411 
1412     s_page = pfn_to_page(src_pfn);
1413     d_page = pfn_to_page(dst_pfn);
1414     if (PageHighMem(s_page)) {
1415         src = kmap_atomic(s_page);
1416         dst = kmap_atomic(d_page);
1417         do_copy_page(dst, src);
1418         kunmap_atomic(dst);
1419         kunmap_atomic(src);
1420     } else {
1421         if (PageHighMem(d_page)) {
1422             /*
1423              * The page pointed to by src may contain some kernel
1424              * data modified by kmap_atomic()
1425              */
1426             safe_copy_page(buffer, s_page);
1427             dst = kmap_atomic(d_page);
1428             copy_page(dst, buffer);
1429             kunmap_atomic(dst);
1430         } else {
1431             safe_copy_page(page_address(d_page), s_page);
1432         }
1433     }
1434 }
1435 #else
1436 #define page_is_saveable(zone, pfn) saveable_page(zone, pfn)
1437 
copy_data_page(unsigned long dst_pfn,unsigned long src_pfn)1438 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
1439 {
1440     safe_copy_page(page_address(pfn_to_page(dst_pfn)), pfn_to_page(src_pfn));
1441 }
1442 #endif /* CONFIG_HIGHMEM */
1443 
copy_data_pages(struct memory_bitmap * copy_bm,struct memory_bitmap * orig_bm)1444 static void copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
1445 {
1446     struct zone *zone;
1447     unsigned long pfn;
1448 
1449     for_each_populated_zone(zone)
1450     {
1451         unsigned long max_zone_pfn;
1452 
1453         mark_free_pages(zone);
1454         max_zone_pfn = zone_end_pfn(zone);
1455         for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
1456             if (page_is_saveable(zone, pfn)) {
1457                 memory_bm_set_bit(orig_bm, pfn);
1458             }
1459         }
1460     }
1461     memory_bm_position_reset(orig_bm);
1462     memory_bm_position_reset(copy_bm);
1463     for (;;) {
1464         pfn = memory_bm_next_pfn(orig_bm);
1465         if (unlikely(pfn == BM_END_OF_MAP)) {
1466             break;
1467         }
1468         copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
1469     }
1470 }
1471 
1472 /* Total number of image pages */
1473 static unsigned int nr_copy_pages;
1474 /* Number of pages needed for saving the original pfns of the image pages */
1475 static unsigned int nr_meta_pages;
1476 /*
1477  * Numbers of normal and highmem page frames allocated for hibernation image
1478  * before suspending devices.
1479  */
1480 static unsigned int alloc_normal, alloc_highmem;
1481 /*
1482  * Memory bitmap used for marking saveable pages (during hibernation) or
1483  * hibernation image pages (during restore)
1484  */
1485 static struct memory_bitmap orig_bm;
1486 /*
1487  * Memory bitmap used during hibernation for marking allocated page frames that
1488  * will contain copies of saveable pages.  During restore it is initially used
1489  * for marking hibernation image pages, but then the set bits from it are
1490  * duplicated in @orig_bm and it is released.  On highmem systems it is next
1491  * used for marking "safe" highmem pages, but it has to be reinitialized for
1492  * this purpose.
1493  */
1494 static struct memory_bitmap copy_bm;
1495 
1496 /**
1497  * swsusp_free - Free pages allocated for hibernation image.
1498  *
1499  * Image pages are alocated before snapshot creation, so they need to be
1500  * released after resume.
1501  */
swsusp_free(void)1502 void swsusp_free(void)
1503 {
1504     unsigned long fb_pfn, fr_pfn;
1505 
1506     if (!forbidden_pages_map || !free_pages_map) {
1507         goto out;
1508     }
1509 
1510     memory_bm_position_reset(forbidden_pages_map);
1511     memory_bm_position_reset(free_pages_map);
1512 
1513     while (1) {
1514         fr_pfn = memory_bm_next_pfn(free_pages_map);
1515         fb_pfn = memory_bm_next_pfn(forbidden_pages_map);
1516 
1517         /*
1518          * Find the next bit set in both bitmaps. This is guaranteed to
1519          * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP.
1520          */
1521         do {
1522             if (fb_pfn < fr_pfn) {
1523                 fb_pfn = memory_bm_next_pfn(forbidden_pages_map);
1524             }
1525             if (fr_pfn < fb_pfn) {
1526                 fr_pfn = memory_bm_next_pfn(free_pages_map);
1527             }
1528         } while (fb_pfn != fr_pfn);
1529 
1530         if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) {
1531             struct page *page = pfn_to_page(fr_pfn);
1532 
1533             memory_bm_clear_current(forbidden_pages_map);
1534             memory_bm_clear_current(free_pages_map);
1535             hibernate_restore_unprotect_page(page_address(page));
1536             __free_page(page);
1537             continue;
1538         }
1539         break;
1540     }
1541 
1542 out:
1543     nr_copy_pages = 0;
1544     nr_meta_pages = 0;
1545     restore_pblist = NULL;
1546     buffer = NULL;
1547     alloc_normal = 0;
1548     alloc_highmem = 0;
1549     hibernate_restore_protection_end();
1550 }
1551 
1552 /* Helper functions used for the shrinking of memory. */
1553 
1554 #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN)
1555 
1556 /**
1557  * preallocate_image_pages - Allocate a number of pages for hibernation image.
1558  * @nr_pages: Number of page frames to allocate.
1559  * @mask: GFP flags to use for the allocation.
1560  *
1561  * Return value: Number of page frames actually allocated
1562  */
preallocate_image_pages(unsigned long nr_pages,gfp_t mask)1563 static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
1564 {
1565     unsigned long nr_alloc = 0;
1566 
1567     while (nr_pages > 0) {
1568         struct page *page;
1569 
1570         page = alloc_image_page(mask);
1571         if (!page) {
1572             break;
1573         }
1574         memory_bm_set_bit(&copy_bm, page_to_pfn(page));
1575         if (PageHighMem(page)) {
1576             alloc_highmem++;
1577         } else {
1578             alloc_normal++;
1579         }
1580         nr_pages--;
1581         nr_alloc++;
1582     }
1583 
1584     return nr_alloc;
1585 }
1586 
preallocate_image_memory(unsigned long nr_pages,unsigned long avail_normal)1587 static unsigned long preallocate_image_memory(unsigned long nr_pages, unsigned long avail_normal)
1588 {
1589     unsigned long alloc;
1590 
1591     if (avail_normal <= alloc_normal) {
1592         return 0;
1593     }
1594 
1595     alloc = avail_normal - alloc_normal;
1596     if (nr_pages < alloc) {
1597         alloc = nr_pages;
1598     }
1599 
1600     return preallocate_image_pages(alloc, GFP_IMAGE);
1601 }
1602 
1603 #ifdef CONFIG_HIGHMEM
preallocate_image_highmem(unsigned long nr_pages)1604 static unsigned long preallocate_image_highmem(unsigned long nr_pages)
1605 {
1606     return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM);
1607 }
1608 
1609 /**
1610  *  _fraction - Compute (an approximation of) x * (multiplier / base).
1611  */
_fraction(u64 x,u64 multiplier,u64 base)1612 static unsigned long _fraction(u64 x, u64 multiplier, u64 base)
1613 {
1614     return div64_u64(x * multiplier, base);
1615 }
1616 
preallocate_highmem_fraction(unsigned long nr_pages,unsigned long highmem,unsigned long total)1617 static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, unsigned long highmem, unsigned long total)
1618 {
1619     unsigned long alloc = _fraction(nr_pages, highmem, total);
1620 
1621     return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM);
1622 }
1623 #else  /* CONFIG_HIGHMEM */
preallocate_image_highmem(unsigned long nr_pages)1624 static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
1625 {
1626     return 0;
1627 }
1628 
preallocate_highmem_fraction(unsigned long nr_pages,unsigned long highmem,unsigned long total)1629 static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, unsigned long highmem,
1630                                                          unsigned long total)
1631 {
1632     return 0;
1633 }
1634 #endif /* CONFIG_HIGHMEM */
1635 
1636 /**
1637  * free_unnecessary_pages - Release preallocated pages not needed for the image.
1638  */
free_unnecessary_pages(void)1639 static unsigned long free_unnecessary_pages(void)
1640 {
1641     unsigned long save, to_free_normal, to_free_highmem, free;
1642 
1643     save = count_data_pages();
1644     if (alloc_normal >= save) {
1645         to_free_normal = alloc_normal - save;
1646         save = 0;
1647     } else {
1648         to_free_normal = 0;
1649         save -= alloc_normal;
1650     }
1651     save += count_highmem_pages();
1652     if (alloc_highmem >= save) {
1653         to_free_highmem = alloc_highmem - save;
1654     } else {
1655         to_free_highmem = 0;
1656         save -= alloc_highmem;
1657         if (to_free_normal > save) {
1658             to_free_normal -= save;
1659         } else {
1660             to_free_normal = 0;
1661         }
1662     }
1663     free = to_free_normal + to_free_highmem;
1664 
1665     memory_bm_position_reset(&copy_bm);
1666 
1667     while (to_free_normal > 0 || to_free_highmem > 0) {
1668         unsigned long pfn = memory_bm_next_pfn(&copy_bm);
1669         struct page *page = pfn_to_page(pfn);
1670 
1671         if (PageHighMem(page)) {
1672             if (!to_free_highmem) {
1673                 continue;
1674             }
1675             to_free_highmem--;
1676             alloc_highmem--;
1677         } else {
1678             if (!to_free_normal) {
1679                 continue;
1680             }
1681             to_free_normal--;
1682             alloc_normal--;
1683         }
1684         memory_bm_clear_bit(&copy_bm, pfn);
1685         swsusp_unset_page_forbidden(page);
1686         swsusp_unset_page_free(page);
1687         __free_page(page);
1688     }
1689 
1690     return free;
1691 }
1692 
1693 /**
1694  * minimum_image_size - Estimate the minimum acceptable size of an image.
1695  * @saveable: Number of saveable pages in the system.
1696  *
1697  * We want to avoid attempting to free too much memory too hard, so estimate the
1698  * minimum acceptable size of a hibernation image to use as the lower limit for
1699  * preallocating memory.
1700  *
1701  * We assume that the minimum image size should be proportional to
1702  *
1703  * [number of saveable pages] - [number of pages that can be freed in theory]
1704  *
1705  * where the second term is the sum of (1) reclaimable slab pages, (2) active
1706  * and (3) inactive anonymous pages, (4) active and (5) inactive file pages.
1707  */
minimum_image_size(unsigned long saveable)1708 static unsigned long minimum_image_size(unsigned long saveable)
1709 {
1710     unsigned long size;
1711 
1712     size = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) + global_node_page_state(NR_ACTIVE_ANON) +
1713            global_node_page_state(NR_INACTIVE_ANON) + global_node_page_state(NR_ACTIVE_FILE) +
1714            global_node_page_state(NR_INACTIVE_FILE);
1715 
1716     return saveable <= size ? 0 : saveable - size;
1717 }
1718 
1719 /**
1720  * hibernate_preallocate_memory - Preallocate memory for hibernation image.
1721  *
1722  * To create a hibernation image it is necessary to make a copy of every page
1723  * frame in use.  We also need a number of page frames to be free during
1724  * hibernation for allocations made while saving the image and for device
1725  * drivers, in case they need to allocate memory from their hibernation
1726  * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough
1727  * estimate) and reserved_size divided by PAGE_SIZE (which is tunable through
1728  * /sys/power/reserved_size, respectively).  To make this happen, we compute the
1729  * total number of available page frames and allocate at least
1730  *
1731  * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2
1732  *  + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
1733  *
1734  * of them, which corresponds to the maximum size of a hibernation image.
1735  *
1736  * If image_size is set below the number following from the above formula,
1737  * the preallocation of memory is continued until the total number of saveable
1738  * pages in the system is below the requested image size or the minimum
1739  * acceptable image size returned by minimum_image_size(), whichever is greater.
1740  */
hibernate_preallocate_memory(void)1741 int hibernate_preallocate_memory(void)
1742 {
1743     struct zone *zone;
1744     unsigned long saveable, size, max_size, count, highmem, pages = 0;
1745     unsigned long alloc, save_highmem, pages_highmem, avail_normal;
1746     ktime_t start, stop;
1747     int error;
1748 
1749     pr_info("Preallocating image memory\n");
1750     start = ktime_get();
1751 
1752     error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY);
1753     if (error) {
1754         pr_err("Cannot allocate original bitmap\n");
1755         goto err_out;
1756     }
1757 
1758     error = memory_bm_create(&copy_bm, GFP_IMAGE, PG_ANY);
1759     if (error) {
1760         pr_err("Cannot allocate copy bitmap\n");
1761         goto err_out;
1762     }
1763 
1764     alloc_normal = 0;
1765     alloc_highmem = 0;
1766 
1767     /* Count the number of saveable data pages. */
1768     save_highmem = count_highmem_pages();
1769     saveable = count_data_pages();
1770 
1771     /*
1772      * Compute the total number of page frames we can use (count) and the
1773      * number of pages needed for image metadata (size).
1774      */
1775     count = saveable;
1776     saveable += save_highmem;
1777     highmem = save_highmem;
1778     size = 0;
1779     for_each_populated_zone(zone)
1780     {
1781         size += snapshot_additional_pages(zone);
1782         if (is_highmem(zone)) {
1783             highmem += zone_page_state(zone, NR_FREE_PAGES);
1784         } else {
1785             count += zone_page_state(zone, NR_FREE_PAGES);
1786         }
1787     }
1788     avail_normal = count;
1789     count += highmem;
1790     count -= totalreserve_pages;
1791 
1792     /* Compute the maximum number of saveable pages to leave in memory. */
1793     max_size = (count - (size + PAGES_FOR_IO)) / SNAPSHOT_TWO - SNAPSHOT_TWO * DIV_ROUND_UP(reserved_size, PAGE_SIZE);
1794     /* Compute the desired number of image pages specified by image_size. */
1795     size = DIV_ROUND_UP(image_size, PAGE_SIZE);
1796     if (size > max_size) {
1797         size = max_size;
1798     }
1799     /*
1800      * If the desired number of image pages is at least as large as the
1801      * current number of saveable pages in memory, allocate page frames for
1802      * the image and we're done.
1803      */
1804     if (size >= saveable) {
1805         pages = preallocate_image_highmem(save_highmem);
1806         pages += preallocate_image_memory(saveable - pages, avail_normal);
1807         goto out;
1808     }
1809 
1810     /* Estimate the minimum size of the image. */
1811     pages = minimum_image_size(saveable);
1812     /*
1813      * To avoid excessive pressure on the normal zone, leave room in it to
1814      * accommodate an image of the minimum size (unless it's already too
1815      * small, in which case don't preallocate pages from it at all).
1816      */
1817     if (avail_normal > pages) {
1818         avail_normal -= pages;
1819     } else {
1820         avail_normal = 0;
1821     }
1822     if (size < pages) {
1823         size = min_t(unsigned long, pages, max_size);
1824     }
1825 
1826     /*
1827      * Let the memory management subsystem know that we're going to need a
1828      * large number of page frames to allocate and make it free some memory.
1829      * NOTE: If this is not done, performance will be hurt badly in some
1830      * test cases.
1831      */
1832     shrink_all_memory(saveable - size);
1833 
1834     /*
1835      * The number of saveable pages in memory was too high, so apply some
1836      * pressure to decrease it.  First, make room for the largest possible
1837      * image and fail if that doesn't work.  Next, try to decrease the size
1838      * of the image as much as indicated by 'size' using allocations from
1839      * highmem and non-highmem zones separately.
1840      */
1841     pages_highmem = preallocate_image_highmem(highmem / SNAPSHOT_TWO);
1842     alloc = count - max_size;
1843     if (alloc > pages_highmem) {
1844         alloc -= pages_highmem;
1845     } else {
1846         alloc = 0;
1847     }
1848     pages = preallocate_image_memory(alloc, avail_normal);
1849     if (pages < alloc) {
1850         /* We have exhausted non-highmem pages, try highmem. */
1851         alloc -= pages;
1852         pages += pages_highmem;
1853         pages_highmem = preallocate_image_highmem(alloc);
1854         if (pages_highmem < alloc) {
1855             pr_err("Image allocation is %lu pages short\n", alloc - pages_highmem);
1856             goto err_out;
1857         }
1858         pages += pages_highmem;
1859         /*
1860          * size is the desired number of saveable pages to leave in
1861          * memory, so try to preallocate (all memory - size) pages.
1862          */
1863         alloc = (count - pages) - size;
1864         pages += preallocate_image_highmem(alloc);
1865     } else {
1866         /*
1867          * There are approximately max_size saveable pages at this point
1868          * and we want to reduce this number down to size.
1869          */
1870         alloc = max_size - size;
1871         size = preallocate_highmem_fraction(alloc, highmem, count);
1872         pages_highmem += size;
1873         alloc -= size;
1874         size = preallocate_image_memory(alloc, avail_normal);
1875         pages_highmem += preallocate_image_highmem(alloc - size);
1876         pages += pages_highmem + size;
1877     }
1878 
1879     /*
1880      * We only need as many page frames for the image as there are saveable
1881      * pages in memory, but we have allocated more.  Release the excessive
1882      * ones now.
1883      */
1884     pages -= free_unnecessary_pages();
1885 
1886 out:
1887     stop = ktime_get();
1888     pr_info("Allocated %lu pages for snapshot\n", pages);
1889     swsusp_show_speed(start, stop, pages, "Allocated");
1890 
1891     return 0;
1892 
1893 err_out:
1894     swsusp_free();
1895     return -ENOMEM;
1896 }
1897 
1898 #ifdef CONFIG_HIGHMEM
1899 /**
1900  * count_pages_for_highmem - Count non-highmem pages needed for copying highmem.
1901  *
1902  * Compute the number of non-highmem pages that will be necessary for creating
1903  * copies of highmem pages.
1904  */
count_pages_for_highmem(unsigned int nr_highmem)1905 static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1906 {
1907     unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
1908     if (free_highmem >= nr_highmem) {
1909         nr_highmem = 0;
1910     } else {
1911         nr_highmem -= free_highmem;
1912     }
1913 
1914     return nr_highmem;
1915 }
1916 #else
count_pages_for_highmem(unsigned int nr_highmem)1917 static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1918 {
1919     return 0;
1920 }
1921 #endif /* CONFIG_HIGHMEM */
1922 
1923 /**
1924  * enough_free_mem - Check if there is enough free memory for the image.
1925  */
enough_free_mem(unsigned int nr_pages,unsigned int nr_highmem)1926 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
1927 {
1928     struct zone *zone;
1929     unsigned int free = alloc_normal;
1930 
1931     for_each_populated_zone(zone) if (!is_highmem(zone)) free += zone_page_state(zone, NR_FREE_PAGES);
1932 
1933     nr_pages += count_pages_for_highmem(nr_highmem);
1934     pr_debug("Normal pages needed: %u + %u, available pages: %u\n", nr_pages, PAGES_FOR_IO, free);
1935 
1936     return free > nr_pages + PAGES_FOR_IO;
1937 }
1938 
1939 #ifdef CONFIG_HIGHMEM
1940 /**
1941  * get_highmem_buffer - Allocate a buffer for highmem pages.
1942  *
1943  * If there are some highmem pages in the hibernation image, we may need a
1944  * buffer to copy them and/or load their data.
1945  */
get_highmem_buffer(int safe_needed)1946 static inline int get_highmem_buffer(int safe_needed)
1947 {
1948     buffer = get_image_page(GFP_ATOMIC, safe_needed);
1949     return buffer ? 0 : -ENOMEM;
1950 }
1951 
1952 /**
1953  * alloc_highmem_image_pages - Allocate some highmem pages for the image.
1954  *
1955  * Try to allocate as many pages as needed, but if the number of free highmem
1956  * pages is less than that, allocate them all.
1957  */
alloc_highmem_pages(struct memory_bitmap * bm,unsigned int nr_highmem)1958 static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
1959 {
1960     unsigned int to_alloc = count_free_highmem_pages();
1961     if (to_alloc > nr_highmem) {
1962         to_alloc = nr_highmem;
1963     }
1964 
1965     nr_highmem -= to_alloc;
1966     while (to_alloc-- > 0) {
1967         struct page *page;
1968 
1969         page = alloc_image_page(__GFP_HIGHMEM | __GFP_KSWAPD_RECLAIM);
1970         memory_bm_set_bit(bm, page_to_pfn(page));
1971     }
1972     return nr_highmem;
1973 }
1974 #else
get_highmem_buffer(int safe_needed)1975 static inline int get_highmem_buffer(int safe_needed)
1976 {
1977     return 0;
1978 }
1979 
alloc_highmem_pages(struct memory_bitmap * bm,unsigned int n)1980 static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n)
1981 {
1982     return 0;
1983 }
1984 #endif /* CONFIG_HIGHMEM */
1985 
1986 /**
1987  * swsusp_alloc - Allocate memory for hibernation image.
1988  *
1989  * We first try to allocate as many highmem pages as there are
1990  * saveable highmem pages in the system.  If that fails, we allocate
1991  * non-highmem pages for the copies of the remaining highmem ones.
1992  *
1993  * In this approach it is likely that the copies of highmem pages will
1994  * also be located in the high memory, because of the way in which
1995  * copy_data_pages() works.
1996  */
swsusp_alloc(struct memory_bitmap * copy_bm_ex,unsigned int nr_pages,unsigned int nr_highmem)1997 static int swsusp_alloc(struct memory_bitmap *copy_bm_ex, unsigned int nr_pages, unsigned int nr_highmem)
1998 {
1999     if (nr_highmem > 0) {
2000         if (get_highmem_buffer(PG_ANY)) {
2001             goto err_out;
2002         }
2003         if (nr_highmem > alloc_highmem) {
2004             nr_highmem -= alloc_highmem;
2005             nr_pages += alloc_highmem_pages(copy_bm_ex, nr_highmem);
2006         }
2007     }
2008     if (nr_pages > alloc_normal) {
2009         nr_pages -= alloc_normal;
2010         while (nr_pages-- > 0) {
2011             struct page *page;
2012 
2013             page = alloc_image_page(GFP_ATOMIC);
2014             if (!page) {
2015                 goto err_out;
2016             }
2017             memory_bm_set_bit(copy_bm_ex, page_to_pfn(page));
2018         }
2019     }
2020 
2021     return 0;
2022 
2023 err_out:
2024     swsusp_free();
2025     return -ENOMEM;
2026 }
2027 
swsusp_save(void)2028 asmlinkage __visible int swsusp_save(void)
2029 {
2030     unsigned int nr_pages, nr_highmem;
2031 
2032     pr_info("Creating image:\n");
2033 
2034     drain_local_pages(NULL);
2035     nr_pages = count_data_pages();
2036     nr_highmem = count_highmem_pages();
2037     pr_info("Need to copy %u pages\n", nr_pages + nr_highmem);
2038 
2039     if (!enough_free_mem(nr_pages, nr_highmem)) {
2040         pr_err("Not enough free memory\n");
2041         return -ENOMEM;
2042     }
2043 
2044     if (swsusp_alloc(&copy_bm, nr_pages, nr_highmem)) {
2045         pr_err("Memory allocation failed\n");
2046         return -ENOMEM;
2047     }
2048 
2049     /*
2050      * During allocating of suspend pagedir, new cold pages may appear.
2051      * Kill them.
2052      */
2053     drain_local_pages(NULL);
2054     copy_data_pages(&copy_bm, &orig_bm);
2055 
2056     /*
2057      * End of critical section. From now on, we can write to memory,
2058      * but we should not touch disk. This specially means we must _not_
2059      * touch swap space! Except we must write out our image of course.
2060      */
2061 
2062     nr_pages += nr_highmem;
2063     nr_copy_pages = nr_pages;
2064     nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
2065 
2066     pr_info("Image created (%d pages copied)\n", nr_pages);
2067 
2068     return 0;
2069 }
2070 
2071 #ifndef CONFIG_ARCH_HIBERNATION_HEADER
init_header_complete(struct swsusp_info * info)2072 static int init_header_complete(struct swsusp_info *info)
2073 {
2074     memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
2075     info->version_code = LINUX_VERSION_CODE;
2076     return 0;
2077 }
2078 
check_image_kernel(struct swsusp_info * info)2079 static const char *check_image_kernel(struct swsusp_info *info)
2080 {
2081     if (info->version_code != LINUX_VERSION_CODE) {
2082         return "kernel version";
2083     }
2084     if (strcmp(info->uts.sysname, init_utsname()->sysname)) {
2085         return "system type";
2086     }
2087     if (strcmp(info->uts.release, init_utsname()->release)) {
2088         return "kernel release";
2089     }
2090     if (strcmp(info->uts.version, init_utsname()->version)) {
2091         return "version";
2092     }
2093     if (strcmp(info->uts.machine, init_utsname()->machine)) {
2094         return "machine";
2095     }
2096     return NULL;
2097 }
2098 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
2099 
snapshot_get_image_size(void)2100 unsigned long snapshot_get_image_size(void)
2101 {
2102     return nr_copy_pages + nr_meta_pages + 1;
2103 }
2104 
init_header(struct swsusp_info * info)2105 static int init_header(struct swsusp_info *info)
2106 {
2107     memset(info, 0, sizeof(struct swsusp_info));
2108     info->num_physpages = get_num_physpages();
2109     info->image_pages = nr_copy_pages;
2110     info->pages = snapshot_get_image_size();
2111     info->size = info->pages;
2112     info->size <<= PAGE_SHIFT;
2113     return init_header_complete(info);
2114 }
2115 
2116 /**
2117  * pack_pfns - Prepare PFNs for saving.
2118  * @bm: Memory bitmap.
2119  * @buf: Memory buffer to store the PFNs in.
2120  *
2121  * PFNs corresponding to set bits in @bm are stored in the area of memory
2122  * pointed to by @buf (1 page at a time).
2123  */
pack_pfns(unsigned long * buf,struct memory_bitmap * bm)2124 static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
2125 {
2126     int j;
2127 
2128     for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
2129         buf[j] = memory_bm_next_pfn(bm);
2130         if (unlikely(buf[j] == BM_END_OF_MAP)) {
2131             break;
2132         }
2133     }
2134 }
2135 
2136 /**
2137  * snapshot_read_next - Get the address to read the next image page from.
2138  * @handle: Snapshot handle to be used for the reading.
2139  *
2140  * On the first call, @handle should point to a zeroed snapshot_handle
2141  * structure.  The structure gets populated then and a pointer to it should be
2142  * passed to this function every next time.
2143  *
2144  * On success, the function returns a positive number.  Then, the caller
2145  * is allowed to read up to the returned number of bytes from the memory
2146  * location computed by the data_of() macro.
2147  *
2148  * The function returns 0 to indicate the end of the data stream condition,
2149  * and negative numbers are returned on errors.  If that happens, the structure
2150  * pointed to by @handle is not updated and should not be used any more.
2151  */
snapshot_read_next(struct snapshot_handle * handle)2152 int snapshot_read_next(struct snapshot_handle *handle)
2153 {
2154     if (handle->cur > nr_meta_pages + nr_copy_pages) {
2155         return 0;
2156     }
2157 
2158     if (!buffer) {
2159         /* This makes the buffer be freed by swsusp_free() */
2160         buffer = get_image_page(GFP_ATOMIC, PG_ANY);
2161         if (!buffer) {
2162             return -ENOMEM;
2163         }
2164     }
2165     if (!handle->cur) {
2166         int error;
2167 
2168         error = init_header((struct swsusp_info *)buffer);
2169         if (error) {
2170             return error;
2171         }
2172         handle->buffer = buffer;
2173         memory_bm_position_reset(&orig_bm);
2174         memory_bm_position_reset(&copy_bm);
2175     } else if (handle->cur <= nr_meta_pages) {
2176         clear_page(buffer);
2177         pack_pfns(buffer, &orig_bm);
2178     } else {
2179         struct page *page;
2180 
2181         page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
2182         if (PageHighMem(page)) {
2183             /*
2184              * Highmem pages are copied to the buffer,
2185              * because we can't return with a kmapped
2186              * highmem page (we may not be called again).
2187              */
2188             void *kaddr;
2189 
2190             kaddr = kmap_atomic(page);
2191             copy_page(buffer, kaddr);
2192             kunmap_atomic(kaddr);
2193             handle->buffer = buffer;
2194         } else {
2195             handle->buffer = page_address(page);
2196         }
2197     }
2198     handle->cur++;
2199     return PAGE_SIZE;
2200 }
2201 
duplicate_memory_bitmap(struct memory_bitmap * dst,struct memory_bitmap * src)2202 static void duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
2203 {
2204     unsigned long pfn;
2205 
2206     memory_bm_position_reset(src);
2207     pfn = memory_bm_next_pfn(src);
2208     while (pfn != BM_END_OF_MAP) {
2209         memory_bm_set_bit(dst, pfn);
2210         pfn = memory_bm_next_pfn(src);
2211     }
2212 }
2213 
2214 /**
2215  * mark_unsafe_pages - Mark pages that were used before hibernation.
2216  *
2217  * Mark the pages that cannot be used for storing the image during restoration,
2218  * because they conflict with the pages that had been used before hibernation.
2219  */
mark_unsafe_pages(struct memory_bitmap * bm)2220 static void mark_unsafe_pages(struct memory_bitmap *bm)
2221 {
2222     unsigned long pfn;
2223 
2224     /* Clear the "free"/"unsafe" bit for all PFNs */
2225     memory_bm_position_reset(free_pages_map);
2226     pfn = memory_bm_next_pfn(free_pages_map);
2227     while (pfn != BM_END_OF_MAP) {
2228         memory_bm_clear_current(free_pages_map);
2229         pfn = memory_bm_next_pfn(free_pages_map);
2230     }
2231 
2232     /* Mark pages that correspond to the "original" PFNs as "unsafe" */
2233     duplicate_memory_bitmap(free_pages_map, bm);
2234 
2235     allocated_unsafe_pages = 0;
2236 }
2237 
check_header(struct swsusp_info * info)2238 static int check_header(struct swsusp_info *info)
2239 {
2240     const char *reason;
2241 
2242     reason = check_image_kernel(info);
2243     if (!reason && info->num_physpages != get_num_physpages()) {
2244         reason = "memory size";
2245     }
2246     if (reason) {
2247         pr_err("Image mismatch: %s\n", reason);
2248         return -EPERM;
2249     }
2250     return 0;
2251 }
2252 
2253 /**
2254  * load header - Check the image header and copy the data from it.
2255  */
load_header(struct swsusp_info * info)2256 static int load_header(struct swsusp_info *info)
2257 {
2258     int error;
2259 
2260     restore_pblist = NULL;
2261     error = check_header(info);
2262     if (!error) {
2263         nr_copy_pages = info->image_pages;
2264         nr_meta_pages = info->pages - info->image_pages - 1;
2265     }
2266     return error;
2267 }
2268 
2269 /**
2270  * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
2271  * @bm: Memory bitmap.
2272  * @buf: Area of memory containing the PFNs.
2273  *
2274  * For each element of the array pointed to by @buf (1 page at a time), set the
2275  * corresponding bit in @bm.
2276  */
unpack_orig_pfns(unsigned long * buf,struct memory_bitmap * bm)2277 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
2278 {
2279     int j;
2280 
2281     for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
2282         if (unlikely(buf[j] == BM_END_OF_MAP)) {
2283             break;
2284         }
2285 
2286         if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) {
2287             memory_bm_set_bit(bm, buf[j]);
2288         } else {
2289             return -EFAULT;
2290         }
2291     }
2292 
2293     return 0;
2294 }
2295 
2296 #ifdef CONFIG_HIGHMEM
2297 /*
2298  * struct highmem_pbe is used for creating the list of highmem pages that
2299  * should be restored atomically during the resume from disk, because the page
2300  * frames they have occupied before the suspend are in use.
2301  */
2302 struct highmem_pbe {
2303     struct page *copy_page; /* data is here now */
2304     struct page *orig_page; /* data was here before the suspend */
2305     struct highmem_pbe *next;
2306 };
2307 
2308 /*
2309  * List of highmem PBEs needed for restoring the highmem pages that were
2310  * allocated before the suspend and included in the suspend image, but have
2311  * also been allocated by the "resume" kernel, so their contents cannot be
2312  * written directly to their "original" page frames.
2313  */
2314 static struct highmem_pbe *highmem_pblist;
2315 
2316 /**
2317  * count_highmem_image_pages - Compute the number of highmem pages in the image.
2318  * @bm: Memory bitmap.
2319  *
2320  * The bits in @bm that correspond to image pages are assumed to be set.
2321  */
count_highmem_image_pages(struct memory_bitmap * bm)2322 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
2323 {
2324     unsigned long pfn;
2325     unsigned int cnt = 0;
2326 
2327     memory_bm_position_reset(bm);
2328     pfn = memory_bm_next_pfn(bm);
2329     while (pfn != BM_END_OF_MAP) {
2330         if (PageHighMem(pfn_to_page(pfn))) {
2331             cnt++;
2332         }
2333 
2334         pfn = memory_bm_next_pfn(bm);
2335     }
2336     return cnt;
2337 }
2338 
2339 static unsigned int safe_highmem_pages;
2340 
2341 static struct memory_bitmap *safe_highmem_bm;
2342 
2343 /**
2344  * prepare_highmem_image - Allocate memory for loading highmem data from image.
2345  * @bm: Pointer to an uninitialized memory bitmap structure.
2346  * @nr_highmem_p: Pointer to the number of highmem image pages.
2347  *
2348  * Try to allocate as many highmem pages as there are highmem image pages
2349  * (@nr_highmem_p points to the variable containing the number of highmem image
2350  * pages).  The pages that are "safe" (ie. will not be overwritten when the
2351  * hibernation image is restored entirely) have the corresponding bits set in
2352  * @bm (it must be unitialized).
2353  *
2354  * NOTE: This function should not be called if there are no highmem image pages.
2355  */
prepare_highmem_image(struct memory_bitmap * bm,unsigned int * nr_highmem_p)2356 static int prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
2357 {
2358     unsigned int to_alloc;
2359 
2360     if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) {
2361         return -ENOMEM;
2362     }
2363 
2364     if (get_highmem_buffer(PG_SAFE)) {
2365         return -ENOMEM;
2366     }
2367 
2368     to_alloc = count_free_highmem_pages();
2369     if (to_alloc > *nr_highmem_p) {
2370         to_alloc = *nr_highmem_p;
2371     } else {
2372         *nr_highmem_p = to_alloc;
2373     }
2374 
2375     safe_highmem_pages = 0;
2376     while (to_alloc-- > 0) {
2377         struct page *page;
2378 
2379         page = alloc_page(__GFP_HIGHMEM);
2380         if (!swsusp_page_is_free(page)) {
2381             /* The page is "safe", set its bit the bitmap */
2382             memory_bm_set_bit(bm, page_to_pfn(page));
2383             safe_highmem_pages++;
2384         }
2385         /* Mark the page as allocated */
2386         swsusp_set_page_forbidden(page);
2387         swsusp_set_page_free(page);
2388     }
2389     memory_bm_position_reset(bm);
2390     safe_highmem_bm = bm;
2391     return 0;
2392 }
2393 
2394 static struct page *last_highmem_page;
2395 
2396 /**
2397  * get_highmem_page_buffer - Prepare a buffer to store a highmem image page.
2398  *
2399  * For a given highmem image page get a buffer that suspend_write_next() should
2400  * return to its caller to write to.
2401  *
2402  * If the page is to be saved to its "original" page frame or a copy of
2403  * the page is to be made in the highmem, @buffer is returned.  Otherwise,
2404  * the copy of the page is to be made in normal memory, so the address of
2405  * the copy is returned.
2406  *
2407  * If @buffer is returned, the caller of suspend_write_next() will write
2408  * the page's contents to @buffer, so they will have to be copied to the
2409  * right location on the next call to suspend_write_next() and it is done
2410  * with the help of copy_last_highmem_page().  For this purpose, if
2411  * @buffer is returned, @last_highmem_page is set to the page to which
2412  * the data will have to be copied from @buffer.
2413  */
get_highmem_page_buffer(struct page * page,struct chain_allocator * ca)2414 static void *get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
2415 {
2416     struct highmem_pbe *pbe;
2417     void *kaddr;
2418 
2419     if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
2420         /*
2421          * We have allocated the "original" page frame and we can
2422          * use it directly to store the loaded page.
2423          */
2424         last_highmem_page = page;
2425         return buffer;
2426     }
2427     /*
2428      * The "original" page frame has not been allocated and we have to
2429      * use a "safe" page frame to store the loaded page.
2430      */
2431     pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
2432     if (!pbe) {
2433         swsusp_free();
2434         return ERR_PTR(-ENOMEM);
2435     }
2436     pbe->orig_page = page;
2437     if (safe_highmem_pages > 0) {
2438         struct page *tmp;
2439 
2440         /* Copy of the page will be stored in high memory */
2441         kaddr = buffer;
2442         tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
2443         safe_highmem_pages--;
2444         last_highmem_page = tmp;
2445         pbe->copy_page = tmp;
2446     } else {
2447         /* Copy of the page will be stored in normal memory */
2448         kaddr = safe_pages_list;
2449         safe_pages_list = safe_pages_list->next;
2450         pbe->copy_page = virt_to_page(kaddr);
2451     }
2452     pbe->next = highmem_pblist;
2453     highmem_pblist = pbe;
2454     return kaddr;
2455 }
2456 
2457 /**
2458  * copy_last_highmem_page - Copy most the most recent highmem image page.
2459  *
2460  * Copy the contents of a highmem image from @buffer, where the caller of
2461  * snapshot_write_next() has stored them, to the right location represented by
2462  * @last_highmem_page .
2463  */
copy_last_highmem_page(void)2464 static void copy_last_highmem_page(void)
2465 {
2466     if (last_highmem_page) {
2467         void *dst;
2468 
2469         dst = kmap_atomic(last_highmem_page);
2470         copy_page(dst, buffer);
2471         kunmap_atomic(dst);
2472         last_highmem_page = NULL;
2473     }
2474 }
2475 
last_highmem_page_copied(void)2476 static inline int last_highmem_page_copied(void)
2477 {
2478     return !last_highmem_page;
2479 }
2480 
free_highmem_data(void)2481 static inline void free_highmem_data(void)
2482 {
2483     if (safe_highmem_bm) {
2484         memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
2485     }
2486 
2487     if (buffer) {
2488         free_image_page(buffer, PG_UNSAFE_CLEAR);
2489     }
2490 }
2491 #else
count_highmem_image_pages(struct memory_bitmap * bm)2492 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
2493 {
2494     return 0;
2495 }
2496 
prepare_highmem_image(struct memory_bitmap * bm,unsigned int * nr_highmem_p)2497 static inline int prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
2498 {
2499     return 0;
2500 }
2501 
get_highmem_page_buffer(struct page * page,struct chain_allocator * ca)2502 static inline void *get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
2503 {
2504     return ERR_PTR(-EINVAL);
2505 }
2506 
copy_last_highmem_page(void)2507 static inline void copy_last_highmem_page(void)
2508 {
2509 }
last_highmem_page_copied(void)2510 static inline int last_highmem_page_copied(void)
2511 {
2512     return 1;
2513 }
free_highmem_data(void)2514 static inline void free_highmem_data(void)
2515 {
2516 }
2517 #endif /* CONFIG_HIGHMEM */
2518 
2519 #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
2520 
2521 /**
2522  * prepare_image - Make room for loading hibernation image.
2523  * @new_bm: Unitialized memory bitmap structure.
2524  * @bm: Memory bitmap with unsafe pages marked.
2525  *
2526  * Use @bm to mark the pages that will be overwritten in the process of
2527  * restoring the system memory state from the suspend image ("unsafe" pages)
2528  * and allocate memory for the image.
2529  *
2530  * The idea is to allocate a new memory bitmap first and then allocate
2531  * as many pages as needed for image data, but without specifying what those
2532  * pages will be used for just yet.  Instead, we mark them all as allocated and
2533  * create a lists of "safe" pages to be used later.  On systems with high
2534  * memory a list of "safe" highmem pages is created too.
2535  */
prepare_image(struct memory_bitmap * new_bm,struct memory_bitmap * bm)2536 static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
2537 {
2538     unsigned int nr_pages, nr_highmem;
2539     struct linked_page *lp;
2540     int error;
2541 
2542     /* If there is no highmem, the buffer will not be necessary */
2543     free_image_page(buffer, PG_UNSAFE_CLEAR);
2544     buffer = NULL;
2545 
2546     nr_highmem = count_highmem_image_pages(bm);
2547     mark_unsafe_pages(bm);
2548 
2549     error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
2550     if (error) {
2551         goto Free;
2552     }
2553 
2554     duplicate_memory_bitmap(new_bm, bm);
2555     memory_bm_free(bm, PG_UNSAFE_KEEP);
2556     if (nr_highmem > 0) {
2557         error = prepare_highmem_image(bm, &nr_highmem);
2558         if (error) {
2559             goto Free;
2560         }
2561     }
2562     /*
2563      * Reserve some safe pages for potential later use.
2564      *
2565      * NOTE: This way we make sure there will be enough safe pages for the
2566      * chain_alloc() in get_buffer().  It is a bit wasteful, but
2567      * nr_copy_pages cannot be greater than 50% of the memory anyway.
2568      *
2569      * nr_copy_pages cannot be less than allocated_unsafe_pages too.
2570      */
2571     nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2572     nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
2573     while (nr_pages > 0) {
2574         lp = get_image_page(GFP_ATOMIC, PG_SAFE);
2575         if (!lp) {
2576             error = -ENOMEM;
2577             goto Free;
2578         }
2579         lp->next = safe_pages_list;
2580         safe_pages_list = lp;
2581         nr_pages--;
2582     }
2583     /* Preallocate memory for the image */
2584     nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2585     while (nr_pages > 0) {
2586         lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
2587         if (!lp) {
2588             error = -ENOMEM;
2589             goto Free;
2590         }
2591         if (!swsusp_page_is_free(virt_to_page(lp))) {
2592             /* The page is "safe", add it to the list */
2593             lp->next = safe_pages_list;
2594             safe_pages_list = lp;
2595         }
2596         /* Mark the page as allocated */
2597         swsusp_set_page_forbidden(virt_to_page(lp));
2598         swsusp_set_page_free(virt_to_page(lp));
2599         nr_pages--;
2600     }
2601     return 0;
2602 
2603 Free:
2604     swsusp_free();
2605     return error;
2606 }
2607 
2608 /**
2609  * get_buffer - Get the address to store the next image data page.
2610  *
2611  * Get the address that snapshot_write_next() should return to its caller to
2612  * write to.
2613  */
get_buffer(struct memory_bitmap * bm,struct chain_allocator * ca)2614 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
2615 {
2616     struct pbe *pbe;
2617     struct page *page;
2618     unsigned long pfn = memory_bm_next_pfn(bm);
2619     if (pfn == BM_END_OF_MAP) {
2620         return ERR_PTR(-EFAULT);
2621     }
2622 
2623     page = pfn_to_page(pfn);
2624     if (PageHighMem(page)) {
2625         return get_highmem_page_buffer(page, ca);
2626     }
2627 
2628     if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
2629         /*
2630          * We have allocated the "original" page frame and we can
2631          * use it directly to store the loaded page.
2632          */
2633         return page_address(page);
2634     }
2635 
2636     /*
2637      * The "original" page frame has not been allocated and we have to
2638      * use a "safe" page frame to store the loaded page.
2639      */
2640     pbe = chain_alloc(ca, sizeof(struct pbe));
2641     if (!pbe) {
2642         swsusp_free();
2643         return ERR_PTR(-ENOMEM);
2644     }
2645     pbe->orig_address = page_address(page);
2646     pbe->address = safe_pages_list;
2647     safe_pages_list = safe_pages_list->next;
2648     pbe->next = restore_pblist;
2649     restore_pblist = pbe;
2650     return pbe->address;
2651 }
2652 
2653 /**
2654  * snapshot_write_next - Get the address to store the next image page.
2655  * @handle: Snapshot handle structure to guide the writing.
2656  *
2657  * On the first call, @handle should point to a zeroed snapshot_handle
2658  * structure.  The structure gets populated then and a pointer to it should be
2659  * passed to this function every next time.
2660  *
2661  * On success, the function returns a positive number.  Then, the caller
2662  * is allowed to write up to the returned number of bytes to the memory
2663  * location computed by the data_of() macro.
2664  *
2665  * The function returns 0 to indicate the "end of file" condition.  Negative
2666  * numbers are returned on errors, in which cases the structure pointed to by
2667  * @handle is not updated and should not be used any more.
2668  */
snapshot_write_next(struct snapshot_handle * handle)2669 int snapshot_write_next(struct snapshot_handle *handle)
2670 {
2671     static struct chain_allocator ca;
2672     int error = 0;
2673 
2674     /* Check if we have already loaded the entire image */
2675     if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
2676         return 0;
2677     }
2678 
2679     handle->sync_read = 1;
2680 
2681     if (!handle->cur) {
2682         if (!buffer) {
2683             /* This makes the buffer be freed by swsusp_free() */
2684             buffer = get_image_page(GFP_ATOMIC, PG_ANY);
2685         }
2686 
2687         if (!buffer) {
2688             return -ENOMEM;
2689         }
2690 
2691         handle->buffer = buffer;
2692     } else if (handle->cur == 1) {
2693         error = load_header(buffer);
2694         if (error) {
2695             return error;
2696         }
2697 
2698         safe_pages_list = NULL;
2699 
2700         error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
2701         if (error) {
2702             return error;
2703         }
2704 
2705         hibernate_restore_protection_begin();
2706     } else if (handle->cur <= nr_meta_pages + 1) {
2707         error = unpack_orig_pfns(buffer, &copy_bm);
2708         if (error) {
2709             return error;
2710         }
2711 
2712         if (handle->cur == nr_meta_pages + 1) {
2713             error = prepare_image(&orig_bm, &copy_bm);
2714             if (error) {
2715                 return error;
2716             }
2717 
2718             chain_init(&ca, GFP_ATOMIC, PG_SAFE);
2719             memory_bm_position_reset(&orig_bm);
2720             restore_pblist = NULL;
2721             handle->buffer = get_buffer(&orig_bm, &ca);
2722             handle->sync_read = 0;
2723             if (IS_ERR(handle->buffer)) {
2724                 return PTR_ERR(handle->buffer);
2725             }
2726         }
2727     } else {
2728         copy_last_highmem_page();
2729         hibernate_restore_protect_page(handle->buffer);
2730         handle->buffer = get_buffer(&orig_bm, &ca);
2731         if (IS_ERR(handle->buffer)) {
2732             return PTR_ERR(handle->buffer);
2733         }
2734         if (handle->buffer != buffer) {
2735             handle->sync_read = 0;
2736         }
2737     }
2738     handle->cur++;
2739     return PAGE_SIZE;
2740 }
2741 
2742 /**
2743  * snapshot_write_finalize - Complete the loading of a hibernation image.
2744  *
2745  * Must be called after the last call to snapshot_write_next() in case the last
2746  * page in the image happens to be a highmem page and its contents should be
2747  * stored in highmem.  Additionally, it recycles bitmap memory that's not
2748  * necessary any more.
2749  */
snapshot_write_finalize(struct snapshot_handle * handle)2750 void snapshot_write_finalize(struct snapshot_handle *handle)
2751 {
2752     copy_last_highmem_page();
2753     hibernate_restore_protect_page(handle->buffer);
2754     /* Do that only if we have loaded the image entirely */
2755     if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
2756         memory_bm_recycle(&orig_bm);
2757         free_highmem_data();
2758     }
2759 }
2760 
snapshot_image_loaded(struct snapshot_handle * handle)2761 int snapshot_image_loaded(struct snapshot_handle *handle)
2762 {
2763     return !(!nr_copy_pages || !last_highmem_page_copied() || handle->cur <= nr_meta_pages + nr_copy_pages);
2764 }
2765 
2766 #ifdef CONFIG_HIGHMEM
2767 /* Assumes that @buf is ready and points to a "safe" page */
swap_two_pages_data(struct page * p1,struct page * p2,void * buf)2768 static inline void swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
2769 {
2770     void *kaddr1, *kaddr2;
2771 
2772     kaddr1 = kmap_atomic(p1);
2773     kaddr2 = kmap_atomic(p2);
2774     copy_page(buf, kaddr1);
2775     copy_page(kaddr1, kaddr2);
2776     copy_page(kaddr2, buf);
2777     kunmap_atomic(kaddr2);
2778     kunmap_atomic(kaddr1);
2779 }
2780 
2781 /**
2782  * restore_highmem - Put highmem image pages into their original locations.
2783  *
2784  * For each highmem page that was in use before hibernation and is included in
2785  * the image, and also has been allocated by the "restore" kernel, swap its
2786  * current contents with the previous (ie. "before hibernation") ones.
2787  *
2788  * If the restore eventually fails, we can call this function once again and
2789  * restore the highmem state as seen by the restore kernel.
2790  */
restore_highmem(void)2791 int restore_highmem(void)
2792 {
2793     struct highmem_pbe *pbe = highmem_pblist;
2794     void *buf;
2795 
2796     if (!pbe) {
2797         return 0;
2798     }
2799 
2800     buf = get_image_page(GFP_ATOMIC, PG_SAFE);
2801     if (!buf) {
2802         return -ENOMEM;
2803     }
2804 
2805     while (pbe) {
2806         swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
2807         pbe = pbe->next;
2808     }
2809     free_image_page(buf, PG_UNSAFE_CLEAR);
2810     return 0;
2811 }
2812 #endif /* CONFIG_HIGHMEM */
2813