• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Page Size Migration
4  *
5  * This file contains the core logic of mitigations to ensure
6  * app compatibility during the transition from 4kB to 16kB
7  * page size in Android.
8  *
9  * Copyright (c) 2024, Google LLC.
10  * Author: Kalesh Singh <kaleshsingh@goole.com>
11  */
12 
13 #include <linux/pgsize_migration.h>
14 
15 #include <linux/init.h>
16 #include <linux/jump_label.h>
17 #include <linux/kobject.h>
18 #include <linux/kstrtox.h>
19 #include <linux/sched/task_stack.h>
20 #include <linux/slab.h>
21 #include <linux/sysfs.h>
22 
23 #ifdef CONFIG_64BIT
24 #if PAGE_SIZE == SZ_4K
25 DEFINE_STATIC_KEY_TRUE(pgsize_migration_enabled);
26 
27 #define is_pgsize_migration_enabled() 	(static_branch_likely(&pgsize_migration_enabled))
28 #else /* PAGE_SIZE != SZ_4K */
29 DEFINE_STATIC_KEY_FALSE(pgsize_migration_enabled);
30 
31 #define is_pgsize_migration_enabled() 	(static_branch_unlikely(&pgsize_migration_enabled))
32 #endif /* PAGE_SIZE == SZ_4K */
33 
show_pgsize_migration_enabled(struct kobject * kobj,struct kobj_attribute * attr,char * buf)34 static ssize_t show_pgsize_migration_enabled(struct kobject *kobj,
35 					     struct kobj_attribute *attr,
36 					     char *buf)
37 {
38 	if (is_pgsize_migration_enabled())
39 		return sprintf(buf, "%d\n", 1);
40 	else
41 		return sprintf(buf, "%d\n", 0);
42 }
43 
store_pgsize_migration_enabled(struct kobject * kobj,struct kobj_attribute * attr,const char * buf,size_t n)44 static ssize_t store_pgsize_migration_enabled(struct kobject *kobj,
45 					      struct kobj_attribute *attr,
46 					      const char *buf, size_t n)
47 {
48 	unsigned long val;
49 
50 	/* Migration is only applicable to 4kB kernels */
51 	if (PAGE_SIZE != SZ_4K)
52 		return n;
53 
54 	if (kstrtoul(buf, 10, &val))
55 		return -EINVAL;
56 
57 	if (val > 1)
58 		return -EINVAL;
59 
60 	if (val == 1)
61 		static_branch_enable(&pgsize_migration_enabled);
62 	else if (val == 0)
63 		static_branch_disable(&pgsize_migration_enabled);
64 
65 	return n;
66 }
67 
68 static struct kobj_attribute pgsize_migration_enabled_attr = __ATTR(
69 	enabled,
70 	0644,
71 	show_pgsize_migration_enabled,
72 	store_pgsize_migration_enabled
73 );
74 
75 static struct attribute *pgsize_migration_attrs[] = {
76 	&pgsize_migration_enabled_attr.attr,
77 	NULL
78 };
79 
80 static struct attribute_group pgsize_migration_attr_group = {
81 	.name = "pgsize_migration",
82 	.attrs = pgsize_migration_attrs,
83 };
84 
85 /**
86  * What:          /sys/kernel/mm/pgsize_migration/enabled
87  * Date:          April 2024
88  * KernelVersion: v5.4+ (GKI kernels)
89  * Contact:       Kalesh Singh <kaleshsingh@google.com>
90  * Description:   /sys/kernel/mm/pgsize_migration/enabled
91  *                allows for userspace to turn on or off page size
92  *                migration mitigations necessary for app compatibility
93  *                during Android's transition from 4kB to 16kB page size.
94  *                Such mitigations include preserving /proc/<pid>/[s]maps
95  *                output as if there was no segment extension by the
96  *                dynamic loader; and preventing fault around in the padding
97  *                sections of ELF LOAD segment mappings.
98  * Users:         Bionic's dynamic linker
99  */
init_pgsize_migration(void)100 static int __init init_pgsize_migration(void)
101 {
102 	if (sysfs_create_group(mm_kobj, &pgsize_migration_attr_group))
103 		pr_err("pgsize_migration: failed to create sysfs group\n");
104 
105 	return 0;
106 };
107 late_initcall(init_pgsize_migration);
108 
109 #if PAGE_SIZE == SZ_4K
vma_set_pad_pages(struct vm_area_struct * vma,unsigned long nr_pages)110 void vma_set_pad_pages(struct vm_area_struct *vma,
111 		       unsigned long nr_pages)
112 {
113 	if (!is_pgsize_migration_enabled())
114 		return;
115 
116 	vma->vm_flags &= ~VM_PAD_MASK;
117 	vma->vm_flags |= (nr_pages << VM_PAD_SHIFT);
118 }
119 
vma_pad_pages(struct vm_area_struct * vma)120 unsigned long vma_pad_pages(struct vm_area_struct *vma)
121 {
122 	if (!is_pgsize_migration_enabled())
123 		return 0;
124 
125 	return vma->vm_flags >> VM_PAD_SHIFT;
126 }
127 
str_has_suffix(const char * str,const char * suffix)128 static __always_inline bool str_has_suffix(const char *str, const char *suffix)
129 {
130 	size_t str_len = strlen(str);
131 	size_t suffix_len = strlen(suffix);
132 
133 	if (str_len < suffix_len)
134 		return false;
135 
136 	return !strncmp(str + str_len - suffix_len, suffix, suffix_len);
137 }
138 
139 /*
140  * The dynamic linker, or interpreter, operates within the process context
141  * of the binary that necessitated dynamic linking.
142  *
143  * Consequently, process context identifiers; like PID, comm, ...; cannot
144  * be used to differentiate whether the execution context belongs to the
145  * dynamic linker or not.
146  *
147  * linker_ctx() deduces whether execution is currently in the dynamic linker's
148  * context by correlating the current userspace instruction pointer with the
149  * VMAs of the current task.
150  *
151  * Returns true if in linker context, otherwise false.
152  *
153  * Caller must hold mmap lock in read mode.
154  */
linker_ctx(void)155 static inline bool linker_ctx(void)
156 {
157 	struct pt_regs *regs = task_pt_regs(current);
158 	struct mm_struct *mm = current->mm;
159 	struct vm_area_struct *vma;
160 	struct file *file;
161 
162 	if (!regs)
163 		return false;
164 
165 	vma = find_vma(mm, instruction_pointer(regs));
166 
167 	/* Current execution context, the VMA must be present */
168 	BUG_ON(!vma);
169 
170 	file = vma->vm_file;
171 	if (!file)
172 		return false;
173 
174 	if ((vma->vm_flags & VM_EXEC)) {
175 		char buf[64];
176 		const int bufsize = sizeof(buf);
177 		char *path;
178 
179 		memset(buf, 0, bufsize);
180 		path = d_path(&file->f_path, buf, bufsize);
181 
182 		if (!strcmp(path, "/system/bin/linker64"))
183 			return true;
184 	}
185 
186 	return false;
187 }
188 
189 /*
190  * Saves the number of padding pages for an ELF segment mapping
191  * in vm_flags.
192  *
193  * The number of padding pages is deduced from the madvise DONTNEED range [start, end)
194  * if the following conditions are met:
195  *    1) The range is enclosed by a single VMA
196  *    2) The range ends at the end address of the VMA
197  *    3) The range starts at an address greater than the start address of the VMA
198  *    4) The number of the pages in the range does not exceed VM_TOTAL_PAD_PAGES.
199  *    5) The VMA is a file backed VMA.
200  *    6) The file backing the VMA is a shared library (*.so)
201  *    7) The madvise was requested by bionic's dynamic linker.
202  */
madvise_vma_pad_pages(struct vm_area_struct * vma,unsigned long start,unsigned long end)203 void madvise_vma_pad_pages(struct vm_area_struct *vma,
204 			   unsigned long start, unsigned long end)
205 {
206 	unsigned long nr_pad_pages;
207 
208 	if (!is_pgsize_migration_enabled())
209 		return;
210 
211 	/*
212 	 * If the madvise range is it at the end of the file save the number of
213 	 * pages in vm_flags (only need 4 bits are needed for up to 64kB aligned ELFs).
214 	 */
215 	if (start <= vma->vm_start || end != vma->vm_end)
216 		return;
217 
218 	nr_pad_pages = (end - start) >> PAGE_SHIFT;
219 
220 	if (!nr_pad_pages || nr_pad_pages > VM_TOTAL_PAD_PAGES)
221 		return;
222 
223 	/* Only handle this for file backed VMAs */
224 	if (!vma->vm_file)
225 		return;
226 
227 	/* Limit this to only shared libraries (*.so) */
228 	if (!str_has_suffix(vma->vm_file->f_path.dentry->d_name.name, ".so"))
229 		return;
230 
231 	/* Only bionic's dynamic linker needs to hint padding pages. */
232 	if (!linker_ctx())
233 		return;
234 
235 	vma_set_pad_pages(vma, nr_pad_pages);
236 }
237 
pad_vma_name(struct vm_area_struct * vma)238 static const char *pad_vma_name(struct vm_area_struct *vma)
239 {
240 	return "[page size compat]";
241 }
242 
243 static const struct vm_operations_struct pad_vma_ops = {
244 	.name = pad_vma_name,
245 };
246 
247 /*
248  * Returns a new VMA representing the padding in @vma, if no padding
249  * in @vma returns NULL.
250  */
get_pad_vma(struct vm_area_struct * vma)251 struct vm_area_struct *get_pad_vma(struct vm_area_struct *vma)
252 {
253 	struct vm_area_struct *pad;
254 
255 	if (!is_pgsize_migration_enabled() || !(vma->vm_flags & VM_PAD_MASK))
256 		return NULL;
257 
258 	pad = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
259 
260 	*pad = *vma;
261 
262 	/* Remove file */
263 	pad->vm_file = NULL;
264 
265 	/* Add vm_ops->name */
266 	pad->vm_ops = &pad_vma_ops;
267 
268 	/* Adjust the start to begin at the start of the padding section */
269 	pad->vm_start = VMA_PAD_START(pad);
270 
271 	/* Make the pad vma PROT_NONE */
272 	pad->vm_flags &= ~(VM_READ|VM_WRITE|VM_EXEC);
273 
274 	/* Remove padding bits */
275 	pad->vm_flags &= ~VM_PAD_MASK;
276 
277 	return pad;
278 }
279 
280 /*
281  * Returns a new VMA exclusing the padding from @vma; if no padding in
282  * @vma returns @vma.
283  */
get_data_vma(struct vm_area_struct * vma)284 struct vm_area_struct *get_data_vma(struct vm_area_struct *vma)
285 {
286 	struct vm_area_struct *data;
287 
288 	if (!is_pgsize_migration_enabled() || !(vma->vm_flags & VM_PAD_MASK))
289 		return vma;
290 
291 	data = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
292 
293 	*data = *vma;
294 
295 	/* Adjust the end to the start of the padding section */
296 	data->vm_end = VMA_PAD_START(data);
297 
298 	return data;
299 }
300 
301 /*
302  * Calls the show_pad_vma_fn on the @pad VMA, and frees the copies of @vma
303  * and @pad.
304  */
show_map_pad_vma(struct vm_area_struct * vma,struct vm_area_struct * pad,struct seq_file * m,show_pad_vma_fn func)305 void show_map_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *pad,
306 		      struct seq_file *m, show_pad_vma_fn func)
307 {
308 	if (!pad)
309 		return;
310 
311 	/*
312 	 * This cannot happen. If @pad vma was allocated the corresponding
313 	 * @vma should have the VM_PAD_MASK bit(s) set.
314 	 */
315 	BUG_ON(!(vma->vm_flags & VM_PAD_MASK));
316 
317 	/*
318 	 * This cannot happen. @pad is a section of the original VMA.
319 	 * Therefore @vma cannot be null if @pad is not null.
320 	 */
321 	BUG_ON(!vma);
322 
323 	func(m, pad);
324 
325 	kfree(pad);
326 	kfree(vma);
327 }
328 
329 /*
330  * When splitting a padding VMA there are a couple of cases to handle.
331  *
332  * Given:
333  *
334  *     | DDDDPPPP |
335  *
336  * where:
337  *     - D represents 1 page of data;
338  *     - P represents 1 page of padding;
339  *     - | represents the boundaries (start/end) of the VMA
340  *
341  *
342  * 1) Split exactly at the padding boundary
343  *
344  *     | DDDDPPPP | --> | DDDD | PPPP |
345  *
346  *     - Remove padding flags from the first VMA.
347  *     - The second VMA is all padding
348  *
349  * 2) Split within the padding area
350  *
351  *     | DDDDPPPP | --> | DDDDPP | PP |
352  *
353  *     - Subtract the length of the second VMA from the first VMA's padding.
354  *     - The second VMA is all padding, adjust its padding length (flags)
355  *
356  * 3) Split within the data area
357  *
358  *     | DDDDPPPP | --> | DD | DDPPPP |
359  *
360  *     - Remove padding flags from the first VMA.
361  *     - The second VMA is has the same padding as from before the split.
362  */
split_pad_vma(struct vm_area_struct * vma,struct vm_area_struct * new,unsigned long addr,int new_below)363 void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new,
364 		   unsigned long addr, int new_below)
365 {
366 	unsigned long nr_pad_pages = vma_pad_pages(vma);
367 	unsigned long nr_vma2_pages;
368 	struct vm_area_struct *first;
369 	struct vm_area_struct *second;
370 
371 	if (!nr_pad_pages)
372 		return;
373 
374 	if (new_below) {
375 		first = new;
376 		second = vma;
377 	} else {
378 		first = vma;
379 		second = new;
380 	}
381 
382 	nr_vma2_pages = vma_pages(second);
383 
384 	if (nr_vma2_pages == nr_pad_pages) { 			/* Case 1 */
385 		first->vm_flags &= ~VM_PAD_MASK;
386 		vma_set_pad_pages(second, nr_pad_pages);
387 	} else if (nr_vma2_pages < nr_pad_pages) { 		/* Case 2 */
388 		vma_set_pad_pages(first, nr_pad_pages - nr_vma2_pages);
389 		vma_set_pad_pages(second, nr_vma2_pages);
390 	} else {						/* Case 3 */
391 		first->vm_flags &= ~VM_PAD_MASK;
392 		vma_set_pad_pages(second, nr_pad_pages);
393 	}
394 }
395 #endif /* PAGE_SIZE == SZ_4K */
396 #endif /* CONFIG_64BIT */
397