• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2022 Huawei Device Co., Ltd.
4  */
5 
6 #include <asm/page.h>
7 #include <linux/mm.h>
8 #include <linux/mm_types.h>
9 #include <linux/radix-tree.h>
10 #include <linux/rmap.h>
11 #include <linux/slab.h>
12 #include <linux/oom.h> /* find_lock_task_mm */
13 
14 #include <linux/mm_purgeable.h>
15 
16 struct uxpte_t {
17 	atomic64_t val;
18 };
19 
20 #define UXPTE_SIZE_SHIFT 3
21 #define UXPTE_SIZE (1 << UXPTE_SIZE_SHIFT)
22 
23 #define UXPTE_PER_PAGE_SHIFT (PAGE_SHIFT - UXPTE_SIZE_SHIFT)
24 #define UXPTE_PER_PAGE (1 << UXPTE_PER_PAGE_SHIFT)
25 
26 #define UXPTE_PRESENT_BIT 1
27 #define UXPTE_PRESENT_MASK ((1 << UXPTE_PRESENT_BIT) - 1)
28 #define UXPTE_REFCNT_ONE (1 << UXPTE_PRESENT_BIT)
29 #define UXPTE_UNDER_RECLAIM (-UXPTE_REFCNT_ONE)
30 
31 #define vpn(vaddr) ((vaddr) >> PAGE_SHIFT)
32 #define uxpte_pn(vaddr) (vpn(vaddr) >> UXPTE_PER_PAGE_SHIFT)
33 #define uxpte_off(vaddr) (vpn(vaddr) & (UXPTE_PER_PAGE - 1))
34 #define uxpn2addr(uxpn) ((uxpn) << (UXPTE_PER_PAGE_SHIFT + PAGE_SHIFT))
35 #define uxpte_refcnt(uxpte) ((uxpte) >> UXPTE_PRESENT_BIT)
36 #define uxpte_present(uxpte) ((uxpte) & UXPTE_PRESENT_MASK)
37 
uxpte_read(struct uxpte_t * uxpte)38 static inline long uxpte_read(struct uxpte_t *uxpte)
39 {
40 	return atomic64_read(&uxpte->val);
41 }
42 
uxpte_set(struct uxpte_t * uxpte,long val)43 static inline void uxpte_set(struct uxpte_t *uxpte, long val)
44 {
45 	atomic64_set(&uxpte->val, val);
46 }
47 
uxpte_cas(struct uxpte_t * uxpte,long old,long new)48 static inline bool uxpte_cas(struct uxpte_t *uxpte, long old, long new)
49 {
50 	return atomic64_cmpxchg(&uxpte->val, old, new) == old;
51 }
52 
mm_init_uxpgd(struct mm_struct * mm)53 void mm_init_uxpgd(struct mm_struct *mm)
54 {
55 	mm->uxpgd = NULL;
56 	spin_lock_init(&mm->uxpgd_lock);
57 }
58 
mm_clear_uxpgd(struct mm_struct * mm)59 void mm_clear_uxpgd(struct mm_struct *mm)
60 {
61 	struct page *page = NULL;
62 	void **slot = NULL;
63 	struct radix_tree_iter iter;
64 
65 	spin_lock(&mm->uxpgd_lock);
66 	if (!mm->uxpgd)
67 		goto out;
68 	radix_tree_for_each_slot(slot, mm->uxpgd, &iter, 0) {
69 		page = radix_tree_delete(mm->uxpgd, iter.index);
70 		put_page(page);
71 	}
72 out:
73 	kfree(mm->uxpgd);
74 	mm->uxpgd = NULL;
75 	spin_unlock(&mm->uxpgd_lock);
76 }
77 
78 /* should hold uxpgd_lock before invoke */
lookup_uxpte_page(struct vm_area_struct * vma,unsigned long addr,bool alloc)79 static struct page *lookup_uxpte_page(struct vm_area_struct *vma,
80 	unsigned long addr, bool alloc)
81 {
82 	struct radix_tree_root *uxpgd = NULL;
83 	struct page *page = NULL;
84 	struct page *new_page = NULL;
85 	struct mm_struct *mm = vma->vm_mm;
86 	unsigned long uxpn = uxpte_pn(addr);
87 
88 	if (mm->uxpgd)
89 		goto lookup;
90 	if (!alloc)
91 		goto out;
92 	spin_unlock(&mm->uxpgd_lock);
93 	uxpgd = kzalloc(sizeof(struct radix_tree_root), GFP_KERNEL);
94 	if (!uxpgd) {
95 		pr_err("uxpgd alloc failed.\n");
96 		spin_lock(&mm->uxpgd_lock);
97 		goto out;
98 	}
99 	INIT_RADIX_TREE(uxpgd, GFP_KERNEL);
100 	spin_lock(&mm->uxpgd_lock);
101 	if (mm->uxpgd)
102 		kfree(uxpgd);
103 	else
104 		mm->uxpgd = uxpgd;
105 lookup:
106 	page = radix_tree_lookup(mm->uxpgd, uxpn);
107 	if (page)
108 		goto out;
109 	if (!alloc)
110 		goto out;
111 	spin_unlock(&mm->uxpgd_lock);
112 	new_page = alloc_zeroed_user_highpage_movable(vma, addr);
113 	if (!new_page) {
114 		pr_err("uxpte page alloc fail.\n");
115 		spin_lock(&mm->uxpgd_lock);
116 		goto out;
117 	}
118 	if (radix_tree_preload(GFP_KERNEL)) {
119 		put_page(new_page);
120 		pr_err("radix preload fail.\n");
121 		spin_lock(&mm->uxpgd_lock);
122 		goto out;
123 	}
124 	spin_lock(&mm->uxpgd_lock);
125 	page = radix_tree_lookup(mm->uxpgd, uxpn);
126 	if (page) {
127 		put_page(new_page);
128 	} else {
129 		page = new_page;
130 		radix_tree_insert(mm->uxpgd, uxpn, page);
131 	}
132 	radix_tree_preload_end();
133 out:
134 	return page;
135 }
136 
137 /* should hold uxpgd_lock before invoke */
lookup_uxpte(struct vm_area_struct * vma,unsigned long addr,bool alloc)138 static struct uxpte_t *lookup_uxpte(struct vm_area_struct *vma,
139 		unsigned long addr, bool alloc)
140 {
141 	struct uxpte_t *uxpte = NULL;
142 	struct page *page = NULL;
143 
144 	page = lookup_uxpte_page(vma, addr, alloc);
145 	if (!page)
146 		return NULL;
147 	uxpte = page_to_virt(page);
148 
149 	return uxpte + uxpte_off(addr);
150 }
151 
lock_uxpte(struct vm_area_struct * vma,unsigned long addr)152 bool lock_uxpte(struct vm_area_struct *vma, unsigned long addr)
153 {
154 	struct uxpte_t *uxpte = NULL;
155 	long val = 0;
156 
157 	spin_lock(&vma->vm_mm->uxpgd_lock);
158 	uxpte = lookup_uxpte(vma, addr, true);
159 	if (!uxpte)
160 		goto unlock;
161 retry:
162 	val = uxpte_read(uxpte);
163 	if (val >> 1)
164 		goto unlock;
165 	if (!uxpte_cas(uxpte, val, UXPTE_UNDER_RECLAIM))
166 		goto retry;
167 	val = UXPTE_UNDER_RECLAIM;
168 unlock:
169 	spin_unlock(&vma->vm_mm->uxpgd_lock);
170 
171 	return val == UXPTE_UNDER_RECLAIM;
172 }
173 
unlock_uxpte(struct vm_area_struct * vma,unsigned long addr)174 void unlock_uxpte(struct vm_area_struct *vma, unsigned long addr)
175 {
176 	struct uxpte_t *uxpte = NULL;
177 
178 	spin_lock(&vma->vm_mm->uxpgd_lock);
179 	uxpte = lookup_uxpte(vma, addr, false);
180 	if (!uxpte)
181 		goto unlock;
182 	uxpte_set(uxpte, 0);
183 unlock:
184 	spin_unlock(&vma->vm_mm->uxpgd_lock);
185 }
186 
uxpte_set_present(struct vm_area_struct * vma,unsigned long addr)187 bool uxpte_set_present(struct vm_area_struct *vma, unsigned long addr)
188 {
189 	struct uxpte_t *uxpte = NULL;
190 	long val = 0;
191 
192 	spin_lock(&vma->vm_mm->uxpgd_lock);
193 	uxpte = lookup_uxpte(vma, addr, true);
194 	if (!uxpte)
195 		goto unlock;
196 retry:
197 	val = uxpte_read(uxpte);
198 	if (val & 1)
199 		goto unlock;
200 	if (!uxpte_cas(uxpte, val, val + 1))
201 		goto retry;
202 	val++;
203 unlock:
204 	spin_unlock(&vma->vm_mm->uxpgd_lock);
205 
206 	return val & 1;
207 }
208 
uxpte_clear_present(struct vm_area_struct * vma,unsigned long addr)209 void uxpte_clear_present(struct vm_area_struct *vma, unsigned long addr)
210 {
211 	struct uxpte_t *uxpte = NULL;
212 	long val = 0;
213 
214 	spin_lock(&vma->vm_mm->uxpgd_lock);
215 	uxpte = lookup_uxpte(vma, addr, false);
216 	if (!uxpte)
217 		goto unlock;
218 retry:
219 	val = uxpte_read(uxpte);
220 	if (!(val & 1))
221 		goto unlock;
222 	if (!uxpte_cas(uxpte, val, val - 1))
223 		goto retry;
224 unlock:
225 	spin_unlock(&vma->vm_mm->uxpgd_lock);
226 }
227 
do_uxpte_page_fault(struct vm_fault * vmf,pte_t * entry)228 vm_fault_t do_uxpte_page_fault(struct vm_fault *vmf, pte_t *entry)
229 {
230 	struct vm_area_struct *vma = vmf->vma;
231 	unsigned long vma_uxpn = vma->vm_pgoff;
232 	unsigned long off_uxpn = vpn(vmf->address - vma->vm_start);
233 	unsigned long addr = uxpn2addr(vma_uxpn + off_uxpn);
234 	struct page *page = NULL;
235 
236 	if (unlikely(anon_vma_prepare(vma)))
237 		return VM_FAULT_OOM;
238 
239 	spin_lock(&vma->vm_mm->uxpgd_lock);
240 	page = lookup_uxpte_page(vma, addr, true);
241 	spin_unlock(&vma->vm_mm->uxpgd_lock);
242 
243 	if (!page)
244 		return VM_FAULT_OOM;
245 
246 	*entry = mk_pte(page, vma->vm_page_prot);
247 	*entry = pte_sw_mkyoung(*entry);
248 	if (vma->vm_flags & VM_WRITE)
249 		*entry = pte_mkwrite(pte_mkdirty(*entry));
250 	return 0;
251 }
252 
__mm_purg_pages_info(struct mm_struct * mm,unsigned long * total_purg_pages,unsigned long * pined_purg_pages)253 static void __mm_purg_pages_info(struct mm_struct *mm, unsigned long *total_purg_pages,
254 	unsigned long *pined_purg_pages)
255 {
256 	struct page *page = NULL;
257 	void **slot = NULL;
258 	struct radix_tree_iter iter;
259 	struct uxpte_t *uxpte = NULL;
260 	long pte_entry = 0;
261 	int index = 0;
262 	unsigned long nr_total = 0, nr_pined = 0;
263 
264 	spin_lock(&mm->uxpgd_lock);
265 	if (!mm->uxpgd)
266 		goto out;
267 	radix_tree_for_each_slot(slot, mm->uxpgd, &iter, 0) {
268 		page = radix_tree_deref_slot(slot);
269 		if (unlikely(!page))
270 			continue;
271 		uxpte = page_to_virt(page);
272 		for (index = 0; index < UXPTE_PER_PAGE; index++) {
273 			pte_entry = uxpte_read(&(uxpte[index]));
274 			if (uxpte_present(pte_entry) == 0) /* not present */
275 				continue;
276 			nr_total++;
277 			if (uxpte_refcnt(pte_entry) > 0) /* pined by user */
278 				nr_pined++;
279 		}
280 	}
281 out:
282 	spin_unlock(&mm->uxpgd_lock);
283 
284 	if (total_purg_pages)
285 		*total_purg_pages = nr_total;
286 
287 	if (pined_purg_pages)
288 		*pined_purg_pages = nr_pined;
289 }
290 
mm_purg_pages_info(struct mm_struct * mm,unsigned long * total_purg_pages,unsigned long * pined_purg_pages)291 void mm_purg_pages_info(struct mm_struct *mm, unsigned long *total_purg_pages,
292 	unsigned long *pined_purg_pages)
293 {
294 	if (unlikely(!mm))
295 		return;
296 
297 	if (!total_purg_pages && !pined_purg_pages)
298 		return;
299 
300 	__mm_purg_pages_info(mm, total_purg_pages, pined_purg_pages);
301 }
302 
purg_pages_info(unsigned long * total_purg_pages,unsigned long * pined_purg_pages)303 void purg_pages_info(unsigned long *total_purg_pages, unsigned long *pined_purg_pages)
304 {
305 	struct task_struct *p = NULL;
306 	struct task_struct *tsk = NULL;
307 	unsigned long mm_nr_purge = 0, mm_nr_pined = 0;
308 	unsigned long nr_total = 0, nr_pined = 0;
309 
310 	if (!total_purg_pages && !pined_purg_pages)
311 		return;
312 
313 	if (total_purg_pages)
314 		*total_purg_pages = 0;
315 
316 	if (pined_purg_pages)
317 		*pined_purg_pages = 0;
318 
319 	rcu_read_lock();
320 	for_each_process(p) {
321 		tsk = find_lock_task_mm(p);
322 		if (!tsk) {
323 			/*
324 			 * It is a kthread or all of p's threads have already
325 			 * detached their mm's.
326 			 */
327 			continue;
328 		}
329 		__mm_purg_pages_info(tsk->mm, &mm_nr_purge, &mm_nr_pined);
330 		nr_total += mm_nr_purge;
331 		nr_pined += mm_nr_pined;
332 		task_unlock(tsk);
333 
334 		if (mm_nr_purge > 0) {
335 			pr_info("purgemm: tsk: %s %lu pined in %lu pages\n", tsk->comm ?: "NULL",
336 				mm_nr_pined, mm_nr_purge);
337 		}
338 	}
339 	rcu_read_unlock();
340 	if (total_purg_pages)
341 		*total_purg_pages = nr_total;
342 
343 	if (pined_purg_pages)
344 		*pined_purg_pages = nr_pined;
345 	pr_info("purgemm: Sum: %lu pined in %lu pages\n", nr_pined, nr_total);
346 }
347